xref: /petsc/src/mat/impls/baij/seq/baij.c (revision dec0b4665a36c357901b00de474573dbc58fdd79)
1be1d678aSKris Buschelman 
22593348eSBarry Smith /*
3b6490206SBarry Smith     Defines the basic matrix operations for the BAIJ (compressed row)
42593348eSBarry Smith   matrix storage format.
52593348eSBarry Smith */
6c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> /*I   "petscmat.h"  I*/
7c6db04a5SJed Brown #include <petscblaslapack.h>
8af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
9af0996ceSBarry Smith #include <petsc/private/kernels/blockmatmult.h>
1043516a2dSKris Buschelman 
117ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
127ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
137ea3e4caSstefano_zampini #endif
147ea3e4caSstefano_zampini 
15b5b72c8aSIrina Sokolova #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16fd9d3c67SJed Brown PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat, MatType, MatReuse, Mat *);
17b5b72c8aSIrina Sokolova #endif
18c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
19b5b72c8aSIrina Sokolova 
20d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnReductions_SeqBAIJ(Mat A, PetscInt type, PetscReal *reductions)
21d71ae5a4SJacob Faibussowitsch {
229463ebdaSPierre Jolivet   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)A->data;
23857cbf51SRichard Tran Mills   PetscInt     m, n, i;
249463ebdaSPierre Jolivet   PetscInt     ib, jb, bs = A->rmap->bs;
259463ebdaSPierre Jolivet   MatScalar   *a_val = a_aij->a;
269463ebdaSPierre Jolivet 
279463ebdaSPierre Jolivet   PetscFunctionBegin;
289566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &n));
29857cbf51SRichard Tran Mills   for (i = 0; i < n; i++) reductions[i] = 0.0;
309463ebdaSPierre Jolivet   if (type == NORM_2) {
319463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
329463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
339463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
34857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
359463ebdaSPierre Jolivet           a_val++;
369463ebdaSPierre Jolivet         }
379463ebdaSPierre Jolivet       }
389463ebdaSPierre Jolivet     }
399463ebdaSPierre Jolivet   } else if (type == NORM_1) {
409463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
419463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
429463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
43857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
449463ebdaSPierre Jolivet           a_val++;
459463ebdaSPierre Jolivet         }
469463ebdaSPierre Jolivet       }
479463ebdaSPierre Jolivet     }
489463ebdaSPierre Jolivet   } else if (type == NORM_INFINITY) {
499463ebdaSPierre Jolivet     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
509463ebdaSPierre Jolivet       for (jb = 0; jb < bs; jb++) {
519463ebdaSPierre Jolivet         for (ib = 0; ib < bs; ib++) {
529463ebdaSPierre Jolivet           int col         = A->cmap->rstart + a_aij->j[i] * bs + jb;
53857cbf51SRichard Tran Mills           reductions[col] = PetscMax(PetscAbsScalar(*a_val), reductions[col]);
549463ebdaSPierre Jolivet           a_val++;
559463ebdaSPierre Jolivet         }
569463ebdaSPierre Jolivet       }
579463ebdaSPierre Jolivet     }
58857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
59857cbf51SRichard Tran Mills     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
60857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
61857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
62857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
63857cbf51SRichard Tran Mills           a_val++;
64857cbf51SRichard Tran Mills         }
65857cbf51SRichard Tran Mills       }
66857cbf51SRichard Tran Mills     }
67857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
68857cbf51SRichard Tran Mills     for (i = a_aij->i[0]; i < a_aij->i[A->rmap->n / bs]; i++) {
69857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
70857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
71857cbf51SRichard Tran Mills           reductions[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
72857cbf51SRichard Tran Mills           a_val++;
73857cbf51SRichard Tran Mills         }
74857cbf51SRichard Tran Mills       }
75857cbf51SRichard Tran Mills     }
76857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
779463ebdaSPierre Jolivet   if (type == NORM_2) {
78857cbf51SRichard Tran Mills     for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]);
79857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
80857cbf51SRichard Tran Mills     for (i = 0; i < n; i++) reductions[i] /= m;
819463ebdaSPierre Jolivet   }
829463ebdaSPierre Jolivet   PetscFunctionReturn(0);
839463ebdaSPierre Jolivet }
849463ebdaSPierre Jolivet 
85d71ae5a4SJacob Faibussowitsch PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A, const PetscScalar **values)
86d71ae5a4SJacob Faibussowitsch {
87b01c7715SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
88de80f912SBarry Smith   PetscInt    *diag_offset, i, bs = A->rmap->bs, mbs = a->mbs, ipvt[5], bs2 = bs * bs, *v_pivots;
897f0c90edSBarry Smith   MatScalar   *v     = a->a, *odiag, *diag, work[25], *v_work;
9062bba022SBarry Smith   PetscReal    shift = 0.0;
911a9391e3SHong Zhang   PetscBool    allowzeropivot, zeropivotdetected = PETSC_FALSE;
92b01c7715SBarry Smith 
93b01c7715SBarry Smith   PetscFunctionBegin;
94a455e926SHong Zhang   allowzeropivot = PetscNot(A->erroriffailure);
95a455e926SHong Zhang 
969797317bSBarry Smith   if (a->idiagvalid) {
979797317bSBarry Smith     if (values) *values = a->idiag;
989797317bSBarry Smith     PetscFunctionReturn(0);
999797317bSBarry Smith   }
1009566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
101b01c7715SBarry Smith   diag_offset = a->diag;
1024dfa11a4SJacob Faibussowitsch   if (!a->idiag) { PetscCall(PetscMalloc1(bs2 * mbs, &a->idiag)); }
103b01c7715SBarry Smith   diag = a->idiag;
104bbead8a2SBarry Smith   if (values) *values = a->idiag;
105b01c7715SBarry Smith   /* factor and invert each block */
106521d7252SBarry Smith   switch (bs) {
107ab040260SJed Brown   case 1:
108ab040260SJed Brown     for (i = 0; i < mbs; i++) {
109ab040260SJed Brown       odiag   = v + 1 * diag_offset[i];
110ab040260SJed Brown       diag[0] = odiag[0];
111ec1892c8SHong Zhang 
112ec1892c8SHong Zhang       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
113ec1892c8SHong Zhang         if (allowzeropivot) {
1147b6c816cSBarry Smith           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1157b6c816cSBarry Smith           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
1167b6c816cSBarry Smith           A->factorerror_zeropivot_row   = i;
1179566063dSJacob Faibussowitsch           PetscCall(PetscInfo(A, "Zero pivot, row %" PetscInt_FMT "\n", i));
11898921bdaSJacob Faibussowitsch         } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot, row %" PetscInt_FMT " pivot value %g tolerance %g", i, (double)PetscAbsScalar(diag[0]), (double)PETSC_MACHINE_EPSILON);
119ec1892c8SHong Zhang       }
120ec1892c8SHong Zhang 
121d4a378daSJed Brown       diag[0] = (PetscScalar)1.0 / (diag[0] + shift);
122ab040260SJed Brown       diag += 1;
123ab040260SJed Brown     }
124ab040260SJed Brown     break;
125b01c7715SBarry Smith   case 2:
126b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
127b01c7715SBarry Smith       odiag   = v + 4 * diag_offset[i];
1289371c9d4SSatish Balay       diag[0] = odiag[0];
1299371c9d4SSatish Balay       diag[1] = odiag[1];
1309371c9d4SSatish Balay       diag[2] = odiag[2];
1319371c9d4SSatish Balay       diag[3] = odiag[3];
1329566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1337b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
134b01c7715SBarry Smith       diag += 4;
135b01c7715SBarry Smith     }
136b01c7715SBarry Smith     break;
137b01c7715SBarry Smith   case 3:
138b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
139b01c7715SBarry Smith       odiag   = v + 9 * diag_offset[i];
1409371c9d4SSatish Balay       diag[0] = odiag[0];
1419371c9d4SSatish Balay       diag[1] = odiag[1];
1429371c9d4SSatish Balay       diag[2] = odiag[2];
1439371c9d4SSatish Balay       diag[3] = odiag[3];
1449371c9d4SSatish Balay       diag[4] = odiag[4];
1459371c9d4SSatish Balay       diag[5] = odiag[5];
1469371c9d4SSatish Balay       diag[6] = odiag[6];
1479371c9d4SSatish Balay       diag[7] = odiag[7];
148b01c7715SBarry Smith       diag[8] = odiag[8];
1499566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
1507b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
151b01c7715SBarry Smith       diag += 9;
152b01c7715SBarry Smith     }
153b01c7715SBarry Smith     break;
154b01c7715SBarry Smith   case 4:
155b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
156b01c7715SBarry Smith       odiag = v + 16 * diag_offset[i];
1579566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 16));
1589566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
1597b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
160b01c7715SBarry Smith       diag += 16;
161b01c7715SBarry Smith     }
162b01c7715SBarry Smith     break;
163b01c7715SBarry Smith   case 5:
164b01c7715SBarry Smith     for (i = 0; i < mbs; i++) {
165b01c7715SBarry Smith       odiag = v + 25 * diag_offset[i];
1669566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 25));
1679566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
1687b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
169b01c7715SBarry Smith       diag += 25;
170b01c7715SBarry Smith     }
171b01c7715SBarry Smith     break;
172d49b2adcSBarry Smith   case 6:
173d49b2adcSBarry Smith     for (i = 0; i < mbs; i++) {
174d49b2adcSBarry Smith       odiag = v + 36 * diag_offset[i];
1759566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 36));
1769566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
1777b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
178d49b2adcSBarry Smith       diag += 36;
179d49b2adcSBarry Smith     }
180d49b2adcSBarry Smith     break;
181de80f912SBarry Smith   case 7:
182de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
183de80f912SBarry Smith       odiag = v + 49 * diag_offset[i];
1849566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, 49));
1859566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
1867b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
187de80f912SBarry Smith       diag += 49;
188de80f912SBarry Smith     }
189de80f912SBarry Smith     break;
190b01c7715SBarry Smith   default:
1919566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(bs, &v_work, bs, &v_pivots));
192de80f912SBarry Smith     for (i = 0; i < mbs; i++) {
193de80f912SBarry Smith       odiag = v + bs2 * diag_offset[i];
1949566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(diag, odiag, bs2));
1959566063dSJacob Faibussowitsch       PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
1967b6c816cSBarry Smith       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
197de80f912SBarry Smith       diag += bs2;
198de80f912SBarry Smith     }
1999566063dSJacob Faibussowitsch     PetscCall(PetscFree2(v_work, v_pivots));
200b01c7715SBarry Smith   }
201b01c7715SBarry Smith   a->idiagvalid = PETSC_TRUE;
202b01c7715SBarry Smith   PetscFunctionReturn(0);
203b01c7715SBarry Smith }
204b01c7715SBarry Smith 
205d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_SeqBAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
206d71ae5a4SJacob Faibussowitsch {
2076d3beeddSMatthew Knepley   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
208e48d15efSToby Isaac   PetscScalar       *x, *work, *w, *workt, *t;
209e48d15efSToby Isaac   const MatScalar   *v, *aa = a->a, *idiag;
210e48d15efSToby Isaac   const PetscScalar *b, *xb;
2115455b99fSToby Isaac   PetscScalar        s[7], xw[7] = {0}; /* avoid some compilers thinking xw is uninitialized */
212e48d15efSToby Isaac   PetscInt           m = a->mbs, i, i2, nz, bs = A->rmap->bs, bs2 = bs * bs, k, j, idx, it;
213c1ac3661SBarry Smith   const PetscInt    *diag, *ai = a->i, *aj = a->j, *vi;
214b01c7715SBarry Smith 
215b01c7715SBarry Smith   PetscFunctionBegin;
216b01c7715SBarry Smith   its = its * lits;
2175f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat");
2185f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2195f80ce2aSJacob Faibussowitsch   PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift");
2205f80ce2aSJacob Faibussowitsch   PetscCheck(omega == 1.0, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for non-trivial relaxation factor");
2215f80ce2aSJacob Faibussowitsch   PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts");
222b01c7715SBarry Smith 
2239566063dSJacob Faibussowitsch   if (!a->idiagvalid) PetscCall(MatInvertBlockDiagonal(A, NULL));
224b01c7715SBarry Smith 
225b2ec919aSToby Isaac   if (!m) PetscFunctionReturn(0);
226b01c7715SBarry Smith   diag  = a->diag;
227b01c7715SBarry Smith   idiag = a->idiag;
228de80f912SBarry Smith   k     = PetscMax(A->rmap->n, A->cmap->n);
22948a46eb9SPierre Jolivet   if (!a->mult_work) PetscCall(PetscMalloc1(k + 1, &a->mult_work));
23048a46eb9SPierre Jolivet   if (!a->sor_workt) PetscCall(PetscMalloc1(k, &a->sor_workt));
23148a46eb9SPierre Jolivet   if (!a->sor_work) PetscCall(PetscMalloc1(bs, &a->sor_work));
2323475c22fSBarry Smith   work = a->mult_work;
2333475c22fSBarry Smith   t    = a->sor_workt;
234de80f912SBarry Smith   w    = a->sor_work;
235de80f912SBarry Smith 
2369566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xx, &x));
2379566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(bb, &b));
238de80f912SBarry Smith 
239de80f912SBarry Smith   if (flag & SOR_ZERO_INITIAL_GUESS) {
240de80f912SBarry Smith     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
241e48d15efSToby Isaac       switch (bs) {
242e48d15efSToby Isaac       case 1:
243e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(x, idiag, b);
244e48d15efSToby Isaac         t[0] = b[0];
245e48d15efSToby Isaac         i2   = 1;
246e48d15efSToby Isaac         idiag += 1;
247e48d15efSToby Isaac         for (i = 1; i < m; i++) {
248e48d15efSToby Isaac           v    = aa + ai[i];
249e48d15efSToby Isaac           vi   = aj + ai[i];
250e48d15efSToby Isaac           nz   = diag[i] - ai[i];
251e48d15efSToby Isaac           s[0] = b[i2];
252e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
253e48d15efSToby Isaac             xw[0] = x[vi[j]];
254e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
255e48d15efSToby Isaac           }
256e48d15efSToby Isaac           t[i2] = s[0];
257e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
258e48d15efSToby Isaac           x[i2] = xw[0];
259e48d15efSToby Isaac           idiag += 1;
260e48d15efSToby Isaac           i2 += 1;
261e48d15efSToby Isaac         }
262e48d15efSToby Isaac         break;
263e48d15efSToby Isaac       case 2:
264e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(x, idiag, b);
2659371c9d4SSatish Balay         t[0] = b[0];
2669371c9d4SSatish Balay         t[1] = b[1];
267e48d15efSToby Isaac         i2   = 2;
268e48d15efSToby Isaac         idiag += 4;
269e48d15efSToby Isaac         for (i = 1; i < m; i++) {
270e48d15efSToby Isaac           v    = aa + 4 * ai[i];
271e48d15efSToby Isaac           vi   = aj + ai[i];
272e48d15efSToby Isaac           nz   = diag[i] - ai[i];
2739371c9d4SSatish Balay           s[0] = b[i2];
2749371c9d4SSatish Balay           s[1] = b[i2 + 1];
275e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
276e48d15efSToby Isaac             idx   = 2 * vi[j];
277e48d15efSToby Isaac             it    = 4 * j;
2789371c9d4SSatish Balay             xw[0] = x[idx];
2799371c9d4SSatish Balay             xw[1] = x[1 + idx];
280e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
281e48d15efSToby Isaac           }
2829371c9d4SSatish Balay           t[i2]     = s[0];
2839371c9d4SSatish Balay           t[i2 + 1] = s[1];
284e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
2859371c9d4SSatish Balay           x[i2]     = xw[0];
2869371c9d4SSatish Balay           x[i2 + 1] = xw[1];
287e48d15efSToby Isaac           idiag += 4;
288e48d15efSToby Isaac           i2 += 2;
289e48d15efSToby Isaac         }
290e48d15efSToby Isaac         break;
291e48d15efSToby Isaac       case 3:
292e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(x, idiag, b);
2939371c9d4SSatish Balay         t[0] = b[0];
2949371c9d4SSatish Balay         t[1] = b[1];
2959371c9d4SSatish Balay         t[2] = b[2];
296e48d15efSToby Isaac         i2   = 3;
297e48d15efSToby Isaac         idiag += 9;
298e48d15efSToby Isaac         for (i = 1; i < m; i++) {
299e48d15efSToby Isaac           v    = aa + 9 * ai[i];
300e48d15efSToby Isaac           vi   = aj + ai[i];
301e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3029371c9d4SSatish Balay           s[0] = b[i2];
3039371c9d4SSatish Balay           s[1] = b[i2 + 1];
3049371c9d4SSatish Balay           s[2] = b[i2 + 2];
305e48d15efSToby Isaac           while (nz--) {
306e48d15efSToby Isaac             idx   = 3 * (*vi++);
3079371c9d4SSatish Balay             xw[0] = x[idx];
3089371c9d4SSatish Balay             xw[1] = x[1 + idx];
3099371c9d4SSatish Balay             xw[2] = x[2 + idx];
310e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
311e48d15efSToby Isaac             v += 9;
312e48d15efSToby Isaac           }
3139371c9d4SSatish Balay           t[i2]     = s[0];
3149371c9d4SSatish Balay           t[i2 + 1] = s[1];
3159371c9d4SSatish Balay           t[i2 + 2] = s[2];
316e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
3179371c9d4SSatish Balay           x[i2]     = xw[0];
3189371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3199371c9d4SSatish Balay           x[i2 + 2] = xw[2];
320e48d15efSToby Isaac           idiag += 9;
321e48d15efSToby Isaac           i2 += 3;
322e48d15efSToby Isaac         }
323e48d15efSToby Isaac         break;
324e48d15efSToby Isaac       case 4:
325e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(x, idiag, b);
3269371c9d4SSatish Balay         t[0] = b[0];
3279371c9d4SSatish Balay         t[1] = b[1];
3289371c9d4SSatish Balay         t[2] = b[2];
3299371c9d4SSatish Balay         t[3] = b[3];
330e48d15efSToby Isaac         i2   = 4;
331e48d15efSToby Isaac         idiag += 16;
332e48d15efSToby Isaac         for (i = 1; i < m; i++) {
333e48d15efSToby Isaac           v    = aa + 16 * ai[i];
334e48d15efSToby Isaac           vi   = aj + ai[i];
335e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3369371c9d4SSatish Balay           s[0] = b[i2];
3379371c9d4SSatish Balay           s[1] = b[i2 + 1];
3389371c9d4SSatish Balay           s[2] = b[i2 + 2];
3399371c9d4SSatish Balay           s[3] = b[i2 + 3];
340e48d15efSToby Isaac           while (nz--) {
341e48d15efSToby Isaac             idx   = 4 * (*vi++);
3429371c9d4SSatish Balay             xw[0] = x[idx];
3439371c9d4SSatish Balay             xw[1] = x[1 + idx];
3449371c9d4SSatish Balay             xw[2] = x[2 + idx];
3459371c9d4SSatish Balay             xw[3] = x[3 + idx];
346e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
347e48d15efSToby Isaac             v += 16;
348e48d15efSToby Isaac           }
3499371c9d4SSatish Balay           t[i2]     = s[0];
3509371c9d4SSatish Balay           t[i2 + 1] = s[1];
3519371c9d4SSatish Balay           t[i2 + 2] = s[2];
3529371c9d4SSatish Balay           t[i2 + 3] = s[3];
353e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
3549371c9d4SSatish Balay           x[i2]     = xw[0];
3559371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3569371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3579371c9d4SSatish Balay           x[i2 + 3] = xw[3];
358e48d15efSToby Isaac           idiag += 16;
359e48d15efSToby Isaac           i2 += 4;
360e48d15efSToby Isaac         }
361e48d15efSToby Isaac         break;
362e48d15efSToby Isaac       case 5:
363e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(x, idiag, b);
3649371c9d4SSatish Balay         t[0] = b[0];
3659371c9d4SSatish Balay         t[1] = b[1];
3669371c9d4SSatish Balay         t[2] = b[2];
3679371c9d4SSatish Balay         t[3] = b[3];
3689371c9d4SSatish Balay         t[4] = b[4];
369e48d15efSToby Isaac         i2   = 5;
370e48d15efSToby Isaac         idiag += 25;
371e48d15efSToby Isaac         for (i = 1; i < m; i++) {
372e48d15efSToby Isaac           v    = aa + 25 * ai[i];
373e48d15efSToby Isaac           vi   = aj + ai[i];
374e48d15efSToby Isaac           nz   = diag[i] - ai[i];
3759371c9d4SSatish Balay           s[0] = b[i2];
3769371c9d4SSatish Balay           s[1] = b[i2 + 1];
3779371c9d4SSatish Balay           s[2] = b[i2 + 2];
3789371c9d4SSatish Balay           s[3] = b[i2 + 3];
3799371c9d4SSatish Balay           s[4] = b[i2 + 4];
380e48d15efSToby Isaac           while (nz--) {
381e48d15efSToby Isaac             idx   = 5 * (*vi++);
3829371c9d4SSatish Balay             xw[0] = x[idx];
3839371c9d4SSatish Balay             xw[1] = x[1 + idx];
3849371c9d4SSatish Balay             xw[2] = x[2 + idx];
3859371c9d4SSatish Balay             xw[3] = x[3 + idx];
3869371c9d4SSatish Balay             xw[4] = x[4 + idx];
387e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
388e48d15efSToby Isaac             v += 25;
389e48d15efSToby Isaac           }
3909371c9d4SSatish Balay           t[i2]     = s[0];
3919371c9d4SSatish Balay           t[i2 + 1] = s[1];
3929371c9d4SSatish Balay           t[i2 + 2] = s[2];
3939371c9d4SSatish Balay           t[i2 + 3] = s[3];
3949371c9d4SSatish Balay           t[i2 + 4] = s[4];
395e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
3969371c9d4SSatish Balay           x[i2]     = xw[0];
3979371c9d4SSatish Balay           x[i2 + 1] = xw[1];
3989371c9d4SSatish Balay           x[i2 + 2] = xw[2];
3999371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4009371c9d4SSatish Balay           x[i2 + 4] = xw[4];
401e48d15efSToby Isaac           idiag += 25;
402e48d15efSToby Isaac           i2 += 5;
403e48d15efSToby Isaac         }
404e48d15efSToby Isaac         break;
405e48d15efSToby Isaac       case 6:
406e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(x, idiag, b);
4079371c9d4SSatish Balay         t[0] = b[0];
4089371c9d4SSatish Balay         t[1] = b[1];
4099371c9d4SSatish Balay         t[2] = b[2];
4109371c9d4SSatish Balay         t[3] = b[3];
4119371c9d4SSatish Balay         t[4] = b[4];
4129371c9d4SSatish Balay         t[5] = b[5];
413e48d15efSToby Isaac         i2   = 6;
414e48d15efSToby Isaac         idiag += 36;
415e48d15efSToby Isaac         for (i = 1; i < m; i++) {
416e48d15efSToby Isaac           v    = aa + 36 * ai[i];
417e48d15efSToby Isaac           vi   = aj + ai[i];
418e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4199371c9d4SSatish Balay           s[0] = b[i2];
4209371c9d4SSatish Balay           s[1] = b[i2 + 1];
4219371c9d4SSatish Balay           s[2] = b[i2 + 2];
4229371c9d4SSatish Balay           s[3] = b[i2 + 3];
4239371c9d4SSatish Balay           s[4] = b[i2 + 4];
4249371c9d4SSatish Balay           s[5] = b[i2 + 5];
425e48d15efSToby Isaac           while (nz--) {
426e48d15efSToby Isaac             idx   = 6 * (*vi++);
4279371c9d4SSatish Balay             xw[0] = x[idx];
4289371c9d4SSatish Balay             xw[1] = x[1 + idx];
4299371c9d4SSatish Balay             xw[2] = x[2 + idx];
4309371c9d4SSatish Balay             xw[3] = x[3 + idx];
4319371c9d4SSatish Balay             xw[4] = x[4 + idx];
4329371c9d4SSatish Balay             xw[5] = x[5 + idx];
433e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
434e48d15efSToby Isaac             v += 36;
435e48d15efSToby Isaac           }
4369371c9d4SSatish Balay           t[i2]     = s[0];
4379371c9d4SSatish Balay           t[i2 + 1] = s[1];
4389371c9d4SSatish Balay           t[i2 + 2] = s[2];
4399371c9d4SSatish Balay           t[i2 + 3] = s[3];
4409371c9d4SSatish Balay           t[i2 + 4] = s[4];
4419371c9d4SSatish Balay           t[i2 + 5] = s[5];
442e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
4439371c9d4SSatish Balay           x[i2]     = xw[0];
4449371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4459371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4469371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4479371c9d4SSatish Balay           x[i2 + 4] = xw[4];
4489371c9d4SSatish Balay           x[i2 + 5] = xw[5];
449e48d15efSToby Isaac           idiag += 36;
450e48d15efSToby Isaac           i2 += 6;
451e48d15efSToby Isaac         }
452e48d15efSToby Isaac         break;
453e48d15efSToby Isaac       case 7:
454e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
4559371c9d4SSatish Balay         t[0] = b[0];
4569371c9d4SSatish Balay         t[1] = b[1];
4579371c9d4SSatish Balay         t[2] = b[2];
4589371c9d4SSatish Balay         t[3] = b[3];
4599371c9d4SSatish Balay         t[4] = b[4];
4609371c9d4SSatish Balay         t[5] = b[5];
4619371c9d4SSatish Balay         t[6] = b[6];
462e48d15efSToby Isaac         i2   = 7;
463e48d15efSToby Isaac         idiag += 49;
464e48d15efSToby Isaac         for (i = 1; i < m; i++) {
465e48d15efSToby Isaac           v    = aa + 49 * ai[i];
466e48d15efSToby Isaac           vi   = aj + ai[i];
467e48d15efSToby Isaac           nz   = diag[i] - ai[i];
4689371c9d4SSatish Balay           s[0] = b[i2];
4699371c9d4SSatish Balay           s[1] = b[i2 + 1];
4709371c9d4SSatish Balay           s[2] = b[i2 + 2];
4719371c9d4SSatish Balay           s[3] = b[i2 + 3];
4729371c9d4SSatish Balay           s[4] = b[i2 + 4];
4739371c9d4SSatish Balay           s[5] = b[i2 + 5];
4749371c9d4SSatish Balay           s[6] = b[i2 + 6];
475e48d15efSToby Isaac           while (nz--) {
476e48d15efSToby Isaac             idx   = 7 * (*vi++);
4779371c9d4SSatish Balay             xw[0] = x[idx];
4789371c9d4SSatish Balay             xw[1] = x[1 + idx];
4799371c9d4SSatish Balay             xw[2] = x[2 + idx];
4809371c9d4SSatish Balay             xw[3] = x[3 + idx];
4819371c9d4SSatish Balay             xw[4] = x[4 + idx];
4829371c9d4SSatish Balay             xw[5] = x[5 + idx];
4839371c9d4SSatish Balay             xw[6] = x[6 + idx];
484e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
485e48d15efSToby Isaac             v += 49;
486e48d15efSToby Isaac           }
4879371c9d4SSatish Balay           t[i2]     = s[0];
4889371c9d4SSatish Balay           t[i2 + 1] = s[1];
4899371c9d4SSatish Balay           t[i2 + 2] = s[2];
4909371c9d4SSatish Balay           t[i2 + 3] = s[3];
4919371c9d4SSatish Balay           t[i2 + 4] = s[4];
4929371c9d4SSatish Balay           t[i2 + 5] = s[5];
4939371c9d4SSatish Balay           t[i2 + 6] = s[6];
494e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
4959371c9d4SSatish Balay           x[i2]     = xw[0];
4969371c9d4SSatish Balay           x[i2 + 1] = xw[1];
4979371c9d4SSatish Balay           x[i2 + 2] = xw[2];
4989371c9d4SSatish Balay           x[i2 + 3] = xw[3];
4999371c9d4SSatish Balay           x[i2 + 4] = xw[4];
5009371c9d4SSatish Balay           x[i2 + 5] = xw[5];
5019371c9d4SSatish Balay           x[i2 + 6] = xw[6];
502e48d15efSToby Isaac           idiag += 49;
503e48d15efSToby Isaac           i2 += 7;
504e48d15efSToby Isaac         }
505e48d15efSToby Isaac         break;
506e48d15efSToby Isaac       default:
50796b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x);
5089566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(t, b, bs));
509de80f912SBarry Smith         i2 = bs;
510de80f912SBarry Smith         idiag += bs2;
511de80f912SBarry Smith         for (i = 1; i < m; i++) {
512de80f912SBarry Smith           v  = aa + bs2 * ai[i];
513de80f912SBarry Smith           vi = aj + ai[i];
514de80f912SBarry Smith           nz = diag[i] - ai[i];
515de80f912SBarry Smith 
5169566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
517de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
518de80f912SBarry Smith           workt = work;
519de80f912SBarry Smith           for (j = 0; j < nz; j++) {
5209566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
521de80f912SBarry Smith             workt += bs;
522de80f912SBarry Smith           }
52396b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
5249566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(t + i2, w, bs));
52596b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
526de80f912SBarry Smith 
527de80f912SBarry Smith           idiag += bs2;
528de80f912SBarry Smith           i2 += bs;
529de80f912SBarry Smith         }
530e48d15efSToby Isaac         break;
531e48d15efSToby Isaac       }
532de80f912SBarry Smith       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
5339566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * a->nz));
534e48d15efSToby Isaac       xb = t;
5359371c9d4SSatish Balay     } else xb = b;
536de80f912SBarry Smith     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
537e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
538e48d15efSToby Isaac       i2    = bs * (m - 1);
539e48d15efSToby Isaac       switch (bs) {
540e48d15efSToby Isaac       case 1:
541e48d15efSToby Isaac         s[0] = xb[i2];
542e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
543e48d15efSToby Isaac         x[i2] = xw[0];
544e48d15efSToby Isaac         i2 -= 1;
545e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
546e48d15efSToby Isaac           v    = aa + (diag[i] + 1);
547e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
548e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
549e48d15efSToby Isaac           s[0] = xb[i2];
550e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
551e48d15efSToby Isaac             xw[0] = x[vi[j]];
552e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
553e48d15efSToby Isaac           }
554e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
555e48d15efSToby Isaac           x[i2] = xw[0];
556e48d15efSToby Isaac           idiag -= 1;
557e48d15efSToby Isaac           i2 -= 1;
558e48d15efSToby Isaac         }
559e48d15efSToby Isaac         break;
560e48d15efSToby Isaac       case 2:
5619371c9d4SSatish Balay         s[0] = xb[i2];
5629371c9d4SSatish Balay         s[1] = xb[i2 + 1];
563e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5649371c9d4SSatish Balay         x[i2]     = xw[0];
5659371c9d4SSatish Balay         x[i2 + 1] = xw[1];
566e48d15efSToby Isaac         i2 -= 2;
567e48d15efSToby Isaac         idiag -= 4;
568e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
569e48d15efSToby Isaac           v    = aa + 4 * (diag[i] + 1);
570e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
571e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
5729371c9d4SSatish Balay           s[0] = xb[i2];
5739371c9d4SSatish Balay           s[1] = xb[i2 + 1];
574e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
575e48d15efSToby Isaac             idx   = 2 * vi[j];
576e48d15efSToby Isaac             it    = 4 * j;
5779371c9d4SSatish Balay             xw[0] = x[idx];
5789371c9d4SSatish Balay             xw[1] = x[1 + idx];
579e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
580e48d15efSToby Isaac           }
581e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
5829371c9d4SSatish Balay           x[i2]     = xw[0];
5839371c9d4SSatish Balay           x[i2 + 1] = xw[1];
584e48d15efSToby Isaac           idiag -= 4;
585e48d15efSToby Isaac           i2 -= 2;
586e48d15efSToby Isaac         }
587e48d15efSToby Isaac         break;
588e48d15efSToby Isaac       case 3:
5899371c9d4SSatish Balay         s[0] = xb[i2];
5909371c9d4SSatish Balay         s[1] = xb[i2 + 1];
5919371c9d4SSatish Balay         s[2] = xb[i2 + 2];
592e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
5939371c9d4SSatish Balay         x[i2]     = xw[0];
5949371c9d4SSatish Balay         x[i2 + 1] = xw[1];
5959371c9d4SSatish Balay         x[i2 + 2] = xw[2];
596e48d15efSToby Isaac         i2 -= 3;
597e48d15efSToby Isaac         idiag -= 9;
598e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
599e48d15efSToby Isaac           v    = aa + 9 * (diag[i] + 1);
600e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
601e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6029371c9d4SSatish Balay           s[0] = xb[i2];
6039371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6049371c9d4SSatish Balay           s[2] = xb[i2 + 2];
605e48d15efSToby Isaac           while (nz--) {
606e48d15efSToby Isaac             idx   = 3 * (*vi++);
6079371c9d4SSatish Balay             xw[0] = x[idx];
6089371c9d4SSatish Balay             xw[1] = x[1 + idx];
6099371c9d4SSatish Balay             xw[2] = x[2 + idx];
610e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
611e48d15efSToby Isaac             v += 9;
612e48d15efSToby Isaac           }
613e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
6149371c9d4SSatish Balay           x[i2]     = xw[0];
6159371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6169371c9d4SSatish Balay           x[i2 + 2] = xw[2];
617e48d15efSToby Isaac           idiag -= 9;
618e48d15efSToby Isaac           i2 -= 3;
619e48d15efSToby Isaac         }
620e48d15efSToby Isaac         break;
621e48d15efSToby Isaac       case 4:
6229371c9d4SSatish Balay         s[0] = xb[i2];
6239371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6249371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6259371c9d4SSatish Balay         s[3] = xb[i2 + 3];
626e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6279371c9d4SSatish Balay         x[i2]     = xw[0];
6289371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6299371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6309371c9d4SSatish Balay         x[i2 + 3] = xw[3];
631e48d15efSToby Isaac         i2 -= 4;
632e48d15efSToby Isaac         idiag -= 16;
633e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
634e48d15efSToby Isaac           v    = aa + 16 * (diag[i] + 1);
635e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
636e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6379371c9d4SSatish Balay           s[0] = xb[i2];
6389371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6399371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6409371c9d4SSatish Balay           s[3] = xb[i2 + 3];
641e48d15efSToby Isaac           while (nz--) {
642e48d15efSToby Isaac             idx   = 4 * (*vi++);
6439371c9d4SSatish Balay             xw[0] = x[idx];
6449371c9d4SSatish Balay             xw[1] = x[1 + idx];
6459371c9d4SSatish Balay             xw[2] = x[2 + idx];
6469371c9d4SSatish Balay             xw[3] = x[3 + idx];
647e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
648e48d15efSToby Isaac             v += 16;
649e48d15efSToby Isaac           }
650e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
6519371c9d4SSatish Balay           x[i2]     = xw[0];
6529371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6539371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6549371c9d4SSatish Balay           x[i2 + 3] = xw[3];
655e48d15efSToby Isaac           idiag -= 16;
656e48d15efSToby Isaac           i2 -= 4;
657e48d15efSToby Isaac         }
658e48d15efSToby Isaac         break;
659e48d15efSToby Isaac       case 5:
6609371c9d4SSatish Balay         s[0] = xb[i2];
6619371c9d4SSatish Balay         s[1] = xb[i2 + 1];
6629371c9d4SSatish Balay         s[2] = xb[i2 + 2];
6639371c9d4SSatish Balay         s[3] = xb[i2 + 3];
6649371c9d4SSatish Balay         s[4] = xb[i2 + 4];
665e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6669371c9d4SSatish Balay         x[i2]     = xw[0];
6679371c9d4SSatish Balay         x[i2 + 1] = xw[1];
6689371c9d4SSatish Balay         x[i2 + 2] = xw[2];
6699371c9d4SSatish Balay         x[i2 + 3] = xw[3];
6709371c9d4SSatish Balay         x[i2 + 4] = xw[4];
671e48d15efSToby Isaac         i2 -= 5;
672e48d15efSToby Isaac         idiag -= 25;
673e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
674e48d15efSToby Isaac           v    = aa + 25 * (diag[i] + 1);
675e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
676e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
6779371c9d4SSatish Balay           s[0] = xb[i2];
6789371c9d4SSatish Balay           s[1] = xb[i2 + 1];
6799371c9d4SSatish Balay           s[2] = xb[i2 + 2];
6809371c9d4SSatish Balay           s[3] = xb[i2 + 3];
6819371c9d4SSatish Balay           s[4] = xb[i2 + 4];
682e48d15efSToby Isaac           while (nz--) {
683e48d15efSToby Isaac             idx   = 5 * (*vi++);
6849371c9d4SSatish Balay             xw[0] = x[idx];
6859371c9d4SSatish Balay             xw[1] = x[1 + idx];
6869371c9d4SSatish Balay             xw[2] = x[2 + idx];
6879371c9d4SSatish Balay             xw[3] = x[3 + idx];
6889371c9d4SSatish Balay             xw[4] = x[4 + idx];
689e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
690e48d15efSToby Isaac             v += 25;
691e48d15efSToby Isaac           }
692e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
6939371c9d4SSatish Balay           x[i2]     = xw[0];
6949371c9d4SSatish Balay           x[i2 + 1] = xw[1];
6959371c9d4SSatish Balay           x[i2 + 2] = xw[2];
6969371c9d4SSatish Balay           x[i2 + 3] = xw[3];
6979371c9d4SSatish Balay           x[i2 + 4] = xw[4];
698e48d15efSToby Isaac           idiag -= 25;
699e48d15efSToby Isaac           i2 -= 5;
700e48d15efSToby Isaac         }
701e48d15efSToby Isaac         break;
702e48d15efSToby Isaac       case 6:
7039371c9d4SSatish Balay         s[0] = xb[i2];
7049371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7059371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7069371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7079371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7089371c9d4SSatish Balay         s[5] = xb[i2 + 5];
709e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7109371c9d4SSatish Balay         x[i2]     = xw[0];
7119371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7129371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7139371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7149371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7159371c9d4SSatish Balay         x[i2 + 5] = xw[5];
716e48d15efSToby Isaac         i2 -= 6;
717e48d15efSToby Isaac         idiag -= 36;
718e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
719e48d15efSToby Isaac           v    = aa + 36 * (diag[i] + 1);
720e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
721e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7229371c9d4SSatish Balay           s[0] = xb[i2];
7239371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7249371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7259371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7269371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7279371c9d4SSatish Balay           s[5] = xb[i2 + 5];
728e48d15efSToby Isaac           while (nz--) {
729e48d15efSToby Isaac             idx   = 6 * (*vi++);
7309371c9d4SSatish Balay             xw[0] = x[idx];
7319371c9d4SSatish Balay             xw[1] = x[1 + idx];
7329371c9d4SSatish Balay             xw[2] = x[2 + idx];
7339371c9d4SSatish Balay             xw[3] = x[3 + idx];
7349371c9d4SSatish Balay             xw[4] = x[4 + idx];
7359371c9d4SSatish Balay             xw[5] = x[5 + idx];
736e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
737e48d15efSToby Isaac             v += 36;
738e48d15efSToby Isaac           }
739e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
7409371c9d4SSatish Balay           x[i2]     = xw[0];
7419371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7429371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7439371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7449371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7459371c9d4SSatish Balay           x[i2 + 5] = xw[5];
746e48d15efSToby Isaac           idiag -= 36;
747e48d15efSToby Isaac           i2 -= 6;
748e48d15efSToby Isaac         }
749e48d15efSToby Isaac         break;
750e48d15efSToby Isaac       case 7:
7519371c9d4SSatish Balay         s[0] = xb[i2];
7529371c9d4SSatish Balay         s[1] = xb[i2 + 1];
7539371c9d4SSatish Balay         s[2] = xb[i2 + 2];
7549371c9d4SSatish Balay         s[3] = xb[i2 + 3];
7559371c9d4SSatish Balay         s[4] = xb[i2 + 4];
7569371c9d4SSatish Balay         s[5] = xb[i2 + 5];
7579371c9d4SSatish Balay         s[6] = xb[i2 + 6];
758e48d15efSToby Isaac         PetscKernel_v_gets_A_times_w_7(x, idiag, b);
7599371c9d4SSatish Balay         x[i2]     = xw[0];
7609371c9d4SSatish Balay         x[i2 + 1] = xw[1];
7619371c9d4SSatish Balay         x[i2 + 2] = xw[2];
7629371c9d4SSatish Balay         x[i2 + 3] = xw[3];
7639371c9d4SSatish Balay         x[i2 + 4] = xw[4];
7649371c9d4SSatish Balay         x[i2 + 5] = xw[5];
7659371c9d4SSatish Balay         x[i2 + 6] = xw[6];
766e48d15efSToby Isaac         i2 -= 7;
767e48d15efSToby Isaac         idiag -= 49;
768e48d15efSToby Isaac         for (i = m - 2; i >= 0; i--) {
769e48d15efSToby Isaac           v    = aa + 49 * (diag[i] + 1);
770e48d15efSToby Isaac           vi   = aj + diag[i] + 1;
771e48d15efSToby Isaac           nz   = ai[i + 1] - diag[i] - 1;
7729371c9d4SSatish Balay           s[0] = xb[i2];
7739371c9d4SSatish Balay           s[1] = xb[i2 + 1];
7749371c9d4SSatish Balay           s[2] = xb[i2 + 2];
7759371c9d4SSatish Balay           s[3] = xb[i2 + 3];
7769371c9d4SSatish Balay           s[4] = xb[i2 + 4];
7779371c9d4SSatish Balay           s[5] = xb[i2 + 5];
7789371c9d4SSatish Balay           s[6] = xb[i2 + 6];
779e48d15efSToby Isaac           while (nz--) {
780e48d15efSToby Isaac             idx   = 7 * (*vi++);
7819371c9d4SSatish Balay             xw[0] = x[idx];
7829371c9d4SSatish Balay             xw[1] = x[1 + idx];
7839371c9d4SSatish Balay             xw[2] = x[2 + idx];
7849371c9d4SSatish Balay             xw[3] = x[3 + idx];
7859371c9d4SSatish Balay             xw[4] = x[4 + idx];
7869371c9d4SSatish Balay             xw[5] = x[5 + idx];
7879371c9d4SSatish Balay             xw[6] = x[6 + idx];
788e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
789e48d15efSToby Isaac             v += 49;
790e48d15efSToby Isaac           }
791e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
7929371c9d4SSatish Balay           x[i2]     = xw[0];
7939371c9d4SSatish Balay           x[i2 + 1] = xw[1];
7949371c9d4SSatish Balay           x[i2 + 2] = xw[2];
7959371c9d4SSatish Balay           x[i2 + 3] = xw[3];
7969371c9d4SSatish Balay           x[i2 + 4] = xw[4];
7979371c9d4SSatish Balay           x[i2 + 5] = xw[5];
7989371c9d4SSatish Balay           x[i2 + 6] = xw[6];
799e48d15efSToby Isaac           idiag -= 49;
800e48d15efSToby Isaac           i2 -= 7;
801e48d15efSToby Isaac         }
802e48d15efSToby Isaac         break;
803e48d15efSToby Isaac       default:
8049566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(w, xb + i2, bs));
80596b95a6bSBarry Smith         PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
806de80f912SBarry Smith         i2 -= bs;
807e48d15efSToby Isaac         idiag -= bs2;
808de80f912SBarry Smith         for (i = m - 2; i >= 0; i--) {
809de80f912SBarry Smith           v  = aa + bs2 * (diag[i] + 1);
810de80f912SBarry Smith           vi = aj + diag[i] + 1;
811de80f912SBarry Smith           nz = ai[i + 1] - diag[i] - 1;
812de80f912SBarry Smith 
8139566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, xb + i2, bs));
814de80f912SBarry Smith           /* copy all rows of x that are needed into contiguous space */
815de80f912SBarry Smith           workt = work;
816de80f912SBarry Smith           for (j = 0; j < nz; j++) {
8179566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
818de80f912SBarry Smith             workt += bs;
819de80f912SBarry Smith           }
82096b95a6bSBarry Smith           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
82196b95a6bSBarry Smith           PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2);
822e48d15efSToby Isaac 
823de80f912SBarry Smith           idiag -= bs2;
824de80f912SBarry Smith           i2 -= bs;
825de80f912SBarry Smith         }
826e48d15efSToby Isaac         break;
827e48d15efSToby Isaac       }
8289566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz)));
829de80f912SBarry Smith     }
830e48d15efSToby Isaac     its--;
831e48d15efSToby Isaac   }
832e48d15efSToby Isaac   while (its--) {
833e48d15efSToby Isaac     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
834e48d15efSToby Isaac       idiag = a->idiag;
835e48d15efSToby Isaac       i2    = 0;
836e48d15efSToby Isaac       switch (bs) {
837e48d15efSToby Isaac       case 1:
838e48d15efSToby Isaac         for (i = 0; i < m; i++) {
839e48d15efSToby Isaac           v    = aa + ai[i];
840e48d15efSToby Isaac           vi   = aj + ai[i];
841e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
842e48d15efSToby Isaac           s[0] = b[i2];
843e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
844e48d15efSToby Isaac             xw[0] = x[vi[j]];
845e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
846e48d15efSToby Isaac           }
847e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
848e48d15efSToby Isaac           x[i2] += xw[0];
849e48d15efSToby Isaac           idiag += 1;
850e48d15efSToby Isaac           i2 += 1;
851e48d15efSToby Isaac         }
852e48d15efSToby Isaac         break;
853e48d15efSToby Isaac       case 2:
854e48d15efSToby Isaac         for (i = 0; i < m; i++) {
855e48d15efSToby Isaac           v    = aa + 4 * ai[i];
856e48d15efSToby Isaac           vi   = aj + ai[i];
857e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8589371c9d4SSatish Balay           s[0] = b[i2];
8599371c9d4SSatish Balay           s[1] = b[i2 + 1];
860e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
861e48d15efSToby Isaac             idx   = 2 * vi[j];
862e48d15efSToby Isaac             it    = 4 * j;
8639371c9d4SSatish Balay             xw[0] = x[idx];
8649371c9d4SSatish Balay             xw[1] = x[1 + idx];
865e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
866e48d15efSToby Isaac           }
867e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
8689371c9d4SSatish Balay           x[i2] += xw[0];
8699371c9d4SSatish Balay           x[i2 + 1] += xw[1];
870e48d15efSToby Isaac           idiag += 4;
871e48d15efSToby Isaac           i2 += 2;
872e48d15efSToby Isaac         }
873e48d15efSToby Isaac         break;
874e48d15efSToby Isaac       case 3:
875e48d15efSToby Isaac         for (i = 0; i < m; i++) {
876e48d15efSToby Isaac           v    = aa + 9 * ai[i];
877e48d15efSToby Isaac           vi   = aj + ai[i];
878e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
8799371c9d4SSatish Balay           s[0] = b[i2];
8809371c9d4SSatish Balay           s[1] = b[i2 + 1];
8819371c9d4SSatish Balay           s[2] = b[i2 + 2];
882e48d15efSToby Isaac           while (nz--) {
883e48d15efSToby Isaac             idx   = 3 * (*vi++);
8849371c9d4SSatish Balay             xw[0] = x[idx];
8859371c9d4SSatish Balay             xw[1] = x[1 + idx];
8869371c9d4SSatish Balay             xw[2] = x[2 + idx];
887e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
888e48d15efSToby Isaac             v += 9;
889e48d15efSToby Isaac           }
890e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
8919371c9d4SSatish Balay           x[i2] += xw[0];
8929371c9d4SSatish Balay           x[i2 + 1] += xw[1];
8939371c9d4SSatish Balay           x[i2 + 2] += xw[2];
894e48d15efSToby Isaac           idiag += 9;
895e48d15efSToby Isaac           i2 += 3;
896e48d15efSToby Isaac         }
897e48d15efSToby Isaac         break;
898e48d15efSToby Isaac       case 4:
899e48d15efSToby Isaac         for (i = 0; i < m; i++) {
900e48d15efSToby Isaac           v    = aa + 16 * ai[i];
901e48d15efSToby Isaac           vi   = aj + ai[i];
902e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9039371c9d4SSatish Balay           s[0] = b[i2];
9049371c9d4SSatish Balay           s[1] = b[i2 + 1];
9059371c9d4SSatish Balay           s[2] = b[i2 + 2];
9069371c9d4SSatish Balay           s[3] = b[i2 + 3];
907e48d15efSToby Isaac           while (nz--) {
908e48d15efSToby Isaac             idx   = 4 * (*vi++);
9099371c9d4SSatish Balay             xw[0] = x[idx];
9109371c9d4SSatish Balay             xw[1] = x[1 + idx];
9119371c9d4SSatish Balay             xw[2] = x[2 + idx];
9129371c9d4SSatish Balay             xw[3] = x[3 + idx];
913e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
914e48d15efSToby Isaac             v += 16;
915e48d15efSToby Isaac           }
916e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
9179371c9d4SSatish Balay           x[i2] += xw[0];
9189371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9199371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9209371c9d4SSatish Balay           x[i2 + 3] += xw[3];
921e48d15efSToby Isaac           idiag += 16;
922e48d15efSToby Isaac           i2 += 4;
923e48d15efSToby Isaac         }
924e48d15efSToby Isaac         break;
925e48d15efSToby Isaac       case 5:
926e48d15efSToby Isaac         for (i = 0; i < m; i++) {
927e48d15efSToby Isaac           v    = aa + 25 * ai[i];
928e48d15efSToby Isaac           vi   = aj + ai[i];
929e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9309371c9d4SSatish Balay           s[0] = b[i2];
9319371c9d4SSatish Balay           s[1] = b[i2 + 1];
9329371c9d4SSatish Balay           s[2] = b[i2 + 2];
9339371c9d4SSatish Balay           s[3] = b[i2 + 3];
9349371c9d4SSatish Balay           s[4] = b[i2 + 4];
935e48d15efSToby Isaac           while (nz--) {
936e48d15efSToby Isaac             idx   = 5 * (*vi++);
9379371c9d4SSatish Balay             xw[0] = x[idx];
9389371c9d4SSatish Balay             xw[1] = x[1 + idx];
9399371c9d4SSatish Balay             xw[2] = x[2 + idx];
9409371c9d4SSatish Balay             xw[3] = x[3 + idx];
9419371c9d4SSatish Balay             xw[4] = x[4 + idx];
942e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
943e48d15efSToby Isaac             v += 25;
944e48d15efSToby Isaac           }
945e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
9469371c9d4SSatish Balay           x[i2] += xw[0];
9479371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9489371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9499371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9509371c9d4SSatish Balay           x[i2 + 4] += xw[4];
951e48d15efSToby Isaac           idiag += 25;
952e48d15efSToby Isaac           i2 += 5;
953e48d15efSToby Isaac         }
954e48d15efSToby Isaac         break;
955e48d15efSToby Isaac       case 6:
956e48d15efSToby Isaac         for (i = 0; i < m; i++) {
957e48d15efSToby Isaac           v    = aa + 36 * ai[i];
958e48d15efSToby Isaac           vi   = aj + ai[i];
959e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9609371c9d4SSatish Balay           s[0] = b[i2];
9619371c9d4SSatish Balay           s[1] = b[i2 + 1];
9629371c9d4SSatish Balay           s[2] = b[i2 + 2];
9639371c9d4SSatish Balay           s[3] = b[i2 + 3];
9649371c9d4SSatish Balay           s[4] = b[i2 + 4];
9659371c9d4SSatish Balay           s[5] = b[i2 + 5];
966e48d15efSToby Isaac           while (nz--) {
967e48d15efSToby Isaac             idx   = 6 * (*vi++);
9689371c9d4SSatish Balay             xw[0] = x[idx];
9699371c9d4SSatish Balay             xw[1] = x[1 + idx];
9709371c9d4SSatish Balay             xw[2] = x[2 + idx];
9719371c9d4SSatish Balay             xw[3] = x[3 + idx];
9729371c9d4SSatish Balay             xw[4] = x[4 + idx];
9739371c9d4SSatish Balay             xw[5] = x[5 + idx];
974e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
975e48d15efSToby Isaac             v += 36;
976e48d15efSToby Isaac           }
977e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
9789371c9d4SSatish Balay           x[i2] += xw[0];
9799371c9d4SSatish Balay           x[i2 + 1] += xw[1];
9809371c9d4SSatish Balay           x[i2 + 2] += xw[2];
9819371c9d4SSatish Balay           x[i2 + 3] += xw[3];
9829371c9d4SSatish Balay           x[i2 + 4] += xw[4];
9839371c9d4SSatish Balay           x[i2 + 5] += xw[5];
984e48d15efSToby Isaac           idiag += 36;
985e48d15efSToby Isaac           i2 += 6;
986e48d15efSToby Isaac         }
987e48d15efSToby Isaac         break;
988e48d15efSToby Isaac       case 7:
989e48d15efSToby Isaac         for (i = 0; i < m; i++) {
990e48d15efSToby Isaac           v    = aa + 49 * ai[i];
991e48d15efSToby Isaac           vi   = aj + ai[i];
992e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
9939371c9d4SSatish Balay           s[0] = b[i2];
9949371c9d4SSatish Balay           s[1] = b[i2 + 1];
9959371c9d4SSatish Balay           s[2] = b[i2 + 2];
9969371c9d4SSatish Balay           s[3] = b[i2 + 3];
9979371c9d4SSatish Balay           s[4] = b[i2 + 4];
9989371c9d4SSatish Balay           s[5] = b[i2 + 5];
9999371c9d4SSatish Balay           s[6] = b[i2 + 6];
1000e48d15efSToby Isaac           while (nz--) {
1001e48d15efSToby Isaac             idx   = 7 * (*vi++);
10029371c9d4SSatish Balay             xw[0] = x[idx];
10039371c9d4SSatish Balay             xw[1] = x[1 + idx];
10049371c9d4SSatish Balay             xw[2] = x[2 + idx];
10059371c9d4SSatish Balay             xw[3] = x[3 + idx];
10069371c9d4SSatish Balay             xw[4] = x[4 + idx];
10079371c9d4SSatish Balay             xw[5] = x[5 + idx];
10089371c9d4SSatish Balay             xw[6] = x[6 + idx];
1009e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1010e48d15efSToby Isaac             v += 49;
1011e48d15efSToby Isaac           }
1012e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
10139371c9d4SSatish Balay           x[i2] += xw[0];
10149371c9d4SSatish Balay           x[i2 + 1] += xw[1];
10159371c9d4SSatish Balay           x[i2 + 2] += xw[2];
10169371c9d4SSatish Balay           x[i2 + 3] += xw[3];
10179371c9d4SSatish Balay           x[i2 + 4] += xw[4];
10189371c9d4SSatish Balay           x[i2 + 5] += xw[5];
10199371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1020e48d15efSToby Isaac           idiag += 49;
1021e48d15efSToby Isaac           i2 += 7;
1022e48d15efSToby Isaac         }
1023e48d15efSToby Isaac         break;
1024e48d15efSToby Isaac       default:
1025e48d15efSToby Isaac         for (i = 0; i < m; i++) {
1026e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1027e48d15efSToby Isaac           vi = aj + ai[i];
1028e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1029e48d15efSToby Isaac 
10309566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1031e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1032e48d15efSToby Isaac           workt = work;
1033e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
10349566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1035e48d15efSToby Isaac             workt += bs;
1036e48d15efSToby Isaac           }
1037e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1038e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1039e48d15efSToby Isaac 
1040e48d15efSToby Isaac           idiag += bs2;
1041e48d15efSToby Isaac           i2 += bs;
1042e48d15efSToby Isaac         }
1043e48d15efSToby Isaac         break;
1044e48d15efSToby Isaac       }
10459566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * a->nz));
1046e48d15efSToby Isaac     }
1047e48d15efSToby Isaac     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1048e48d15efSToby Isaac       idiag = a->idiag + bs2 * (a->mbs - 1);
1049e48d15efSToby Isaac       i2    = bs * (m - 1);
1050e48d15efSToby Isaac       switch (bs) {
1051e48d15efSToby Isaac       case 1:
1052e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1053e48d15efSToby Isaac           v    = aa + ai[i];
1054e48d15efSToby Isaac           vi   = aj + ai[i];
1055e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
1056e48d15efSToby Isaac           s[0] = b[i2];
1057e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1058e48d15efSToby Isaac             xw[0] = x[vi[j]];
1059e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_1(s, (v + j), xw);
1060e48d15efSToby Isaac           }
1061e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_1(xw, idiag, s);
1062e48d15efSToby Isaac           x[i2] += xw[0];
1063e48d15efSToby Isaac           idiag -= 1;
1064e48d15efSToby Isaac           i2 -= 1;
1065e48d15efSToby Isaac         }
1066e48d15efSToby Isaac         break;
1067e48d15efSToby Isaac       case 2:
1068e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1069e48d15efSToby Isaac           v    = aa + 4 * ai[i];
1070e48d15efSToby Isaac           vi   = aj + ai[i];
1071e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10729371c9d4SSatish Balay           s[0] = b[i2];
10739371c9d4SSatish Balay           s[1] = b[i2 + 1];
1074e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
1075e48d15efSToby Isaac             idx   = 2 * vi[j];
1076e48d15efSToby Isaac             it    = 4 * j;
10779371c9d4SSatish Balay             xw[0] = x[idx];
10789371c9d4SSatish Balay             xw[1] = x[1 + idx];
1079e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_2(s, (v + it), xw);
1080e48d15efSToby Isaac           }
1081e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_2(xw, idiag, s);
10829371c9d4SSatish Balay           x[i2] += xw[0];
10839371c9d4SSatish Balay           x[i2 + 1] += xw[1];
1084e48d15efSToby Isaac           idiag -= 4;
1085e48d15efSToby Isaac           i2 -= 2;
1086e48d15efSToby Isaac         }
1087e48d15efSToby Isaac         break;
1088e48d15efSToby Isaac       case 3:
1089e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1090e48d15efSToby Isaac           v    = aa + 9 * ai[i];
1091e48d15efSToby Isaac           vi   = aj + ai[i];
1092e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
10939371c9d4SSatish Balay           s[0] = b[i2];
10949371c9d4SSatish Balay           s[1] = b[i2 + 1];
10959371c9d4SSatish Balay           s[2] = b[i2 + 2];
1096e48d15efSToby Isaac           while (nz--) {
1097e48d15efSToby Isaac             idx   = 3 * (*vi++);
10989371c9d4SSatish Balay             xw[0] = x[idx];
10999371c9d4SSatish Balay             xw[1] = x[1 + idx];
11009371c9d4SSatish Balay             xw[2] = x[2 + idx];
1101e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_3(s, v, xw);
1102e48d15efSToby Isaac             v += 9;
1103e48d15efSToby Isaac           }
1104e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_3(xw, idiag, s);
11059371c9d4SSatish Balay           x[i2] += xw[0];
11069371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11079371c9d4SSatish Balay           x[i2 + 2] += xw[2];
1108e48d15efSToby Isaac           idiag -= 9;
1109e48d15efSToby Isaac           i2 -= 3;
1110e48d15efSToby Isaac         }
1111e48d15efSToby Isaac         break;
1112e48d15efSToby Isaac       case 4:
1113e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1114e48d15efSToby Isaac           v    = aa + 16 * ai[i];
1115e48d15efSToby Isaac           vi   = aj + ai[i];
1116e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11179371c9d4SSatish Balay           s[0] = b[i2];
11189371c9d4SSatish Balay           s[1] = b[i2 + 1];
11199371c9d4SSatish Balay           s[2] = b[i2 + 2];
11209371c9d4SSatish Balay           s[3] = b[i2 + 3];
1121e48d15efSToby Isaac           while (nz--) {
1122e48d15efSToby Isaac             idx   = 4 * (*vi++);
11239371c9d4SSatish Balay             xw[0] = x[idx];
11249371c9d4SSatish Balay             xw[1] = x[1 + idx];
11259371c9d4SSatish Balay             xw[2] = x[2 + idx];
11269371c9d4SSatish Balay             xw[3] = x[3 + idx];
1127e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_4(s, v, xw);
1128e48d15efSToby Isaac             v += 16;
1129e48d15efSToby Isaac           }
1130e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_4(xw, idiag, s);
11319371c9d4SSatish Balay           x[i2] += xw[0];
11329371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11339371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11349371c9d4SSatish Balay           x[i2 + 3] += xw[3];
1135e48d15efSToby Isaac           idiag -= 16;
1136e48d15efSToby Isaac           i2 -= 4;
1137e48d15efSToby Isaac         }
1138e48d15efSToby Isaac         break;
1139e48d15efSToby Isaac       case 5:
1140e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1141e48d15efSToby Isaac           v    = aa + 25 * ai[i];
1142e48d15efSToby Isaac           vi   = aj + ai[i];
1143e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11449371c9d4SSatish Balay           s[0] = b[i2];
11459371c9d4SSatish Balay           s[1] = b[i2 + 1];
11469371c9d4SSatish Balay           s[2] = b[i2 + 2];
11479371c9d4SSatish Balay           s[3] = b[i2 + 3];
11489371c9d4SSatish Balay           s[4] = b[i2 + 4];
1149e48d15efSToby Isaac           while (nz--) {
1150e48d15efSToby Isaac             idx   = 5 * (*vi++);
11519371c9d4SSatish Balay             xw[0] = x[idx];
11529371c9d4SSatish Balay             xw[1] = x[1 + idx];
11539371c9d4SSatish Balay             xw[2] = x[2 + idx];
11549371c9d4SSatish Balay             xw[3] = x[3 + idx];
11559371c9d4SSatish Balay             xw[4] = x[4 + idx];
1156e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_5(s, v, xw);
1157e48d15efSToby Isaac             v += 25;
1158e48d15efSToby Isaac           }
1159e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_5(xw, idiag, s);
11609371c9d4SSatish Balay           x[i2] += xw[0];
11619371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11629371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11639371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11649371c9d4SSatish Balay           x[i2 + 4] += xw[4];
1165e48d15efSToby Isaac           idiag -= 25;
1166e48d15efSToby Isaac           i2 -= 5;
1167e48d15efSToby Isaac         }
1168e48d15efSToby Isaac         break;
1169e48d15efSToby Isaac       case 6:
1170e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1171e48d15efSToby Isaac           v    = aa + 36 * ai[i];
1172e48d15efSToby Isaac           vi   = aj + ai[i];
1173e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
11749371c9d4SSatish Balay           s[0] = b[i2];
11759371c9d4SSatish Balay           s[1] = b[i2 + 1];
11769371c9d4SSatish Balay           s[2] = b[i2 + 2];
11779371c9d4SSatish Balay           s[3] = b[i2 + 3];
11789371c9d4SSatish Balay           s[4] = b[i2 + 4];
11799371c9d4SSatish Balay           s[5] = b[i2 + 5];
1180e48d15efSToby Isaac           while (nz--) {
1181e48d15efSToby Isaac             idx   = 6 * (*vi++);
11829371c9d4SSatish Balay             xw[0] = x[idx];
11839371c9d4SSatish Balay             xw[1] = x[1 + idx];
11849371c9d4SSatish Balay             xw[2] = x[2 + idx];
11859371c9d4SSatish Balay             xw[3] = x[3 + idx];
11869371c9d4SSatish Balay             xw[4] = x[4 + idx];
11879371c9d4SSatish Balay             xw[5] = x[5 + idx];
1188e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_6(s, v, xw);
1189e48d15efSToby Isaac             v += 36;
1190e48d15efSToby Isaac           }
1191e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_6(xw, idiag, s);
11929371c9d4SSatish Balay           x[i2] += xw[0];
11939371c9d4SSatish Balay           x[i2 + 1] += xw[1];
11949371c9d4SSatish Balay           x[i2 + 2] += xw[2];
11959371c9d4SSatish Balay           x[i2 + 3] += xw[3];
11969371c9d4SSatish Balay           x[i2 + 4] += xw[4];
11979371c9d4SSatish Balay           x[i2 + 5] += xw[5];
1198e48d15efSToby Isaac           idiag -= 36;
1199e48d15efSToby Isaac           i2 -= 6;
1200e48d15efSToby Isaac         }
1201e48d15efSToby Isaac         break;
1202e48d15efSToby Isaac       case 7:
1203e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1204e48d15efSToby Isaac           v    = aa + 49 * ai[i];
1205e48d15efSToby Isaac           vi   = aj + ai[i];
1206e48d15efSToby Isaac           nz   = ai[i + 1] - ai[i];
12079371c9d4SSatish Balay           s[0] = b[i2];
12089371c9d4SSatish Balay           s[1] = b[i2 + 1];
12099371c9d4SSatish Balay           s[2] = b[i2 + 2];
12109371c9d4SSatish Balay           s[3] = b[i2 + 3];
12119371c9d4SSatish Balay           s[4] = b[i2 + 4];
12129371c9d4SSatish Balay           s[5] = b[i2 + 5];
12139371c9d4SSatish Balay           s[6] = b[i2 + 6];
1214e48d15efSToby Isaac           while (nz--) {
1215e48d15efSToby Isaac             idx   = 7 * (*vi++);
12169371c9d4SSatish Balay             xw[0] = x[idx];
12179371c9d4SSatish Balay             xw[1] = x[1 + idx];
12189371c9d4SSatish Balay             xw[2] = x[2 + idx];
12199371c9d4SSatish Balay             xw[3] = x[3 + idx];
12209371c9d4SSatish Balay             xw[4] = x[4 + idx];
12219371c9d4SSatish Balay             xw[5] = x[5 + idx];
12229371c9d4SSatish Balay             xw[6] = x[6 + idx];
1223e48d15efSToby Isaac             PetscKernel_v_gets_v_minus_A_times_w_7(s, v, xw);
1224e48d15efSToby Isaac             v += 49;
1225e48d15efSToby Isaac           }
1226e48d15efSToby Isaac           PetscKernel_v_gets_A_times_w_7(xw, idiag, s);
12279371c9d4SSatish Balay           x[i2] += xw[0];
12289371c9d4SSatish Balay           x[i2 + 1] += xw[1];
12299371c9d4SSatish Balay           x[i2 + 2] += xw[2];
12309371c9d4SSatish Balay           x[i2 + 3] += xw[3];
12319371c9d4SSatish Balay           x[i2 + 4] += xw[4];
12329371c9d4SSatish Balay           x[i2 + 5] += xw[5];
12339371c9d4SSatish Balay           x[i2 + 6] += xw[6];
1234e48d15efSToby Isaac           idiag -= 49;
1235e48d15efSToby Isaac           i2 -= 7;
1236e48d15efSToby Isaac         }
1237e48d15efSToby Isaac         break;
1238e48d15efSToby Isaac       default:
1239e48d15efSToby Isaac         for (i = m - 1; i >= 0; i--) {
1240e48d15efSToby Isaac           v  = aa + bs2 * ai[i];
1241e48d15efSToby Isaac           vi = aj + ai[i];
1242e48d15efSToby Isaac           nz = ai[i + 1] - ai[i];
1243e48d15efSToby Isaac 
12449566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(w, b + i2, bs));
1245e48d15efSToby Isaac           /* copy all rows of x that are needed into contiguous space */
1246e48d15efSToby Isaac           workt = work;
1247e48d15efSToby Isaac           for (j = 0; j < nz; j++) {
12489566063dSJacob Faibussowitsch             PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs));
1249e48d15efSToby Isaac             workt += bs;
1250e48d15efSToby Isaac           }
1251e48d15efSToby Isaac           PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, v, work);
1252e48d15efSToby Isaac           PetscKernel_w_gets_w_plus_Ar_times_v(bs, bs, w, idiag, x + i2);
1253e48d15efSToby Isaac 
1254e48d15efSToby Isaac           idiag -= bs2;
1255e48d15efSToby Isaac           i2 -= bs;
1256e48d15efSToby Isaac         }
1257e48d15efSToby Isaac         break;
1258e48d15efSToby Isaac       }
12599566063dSJacob Faibussowitsch       PetscCall(PetscLogFlops(2.0 * bs2 * (a->nz)));
1260e48d15efSToby Isaac     }
1261e48d15efSToby Isaac   }
12629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xx, &x));
12639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(bb, &b));
1264de80f912SBarry Smith   PetscFunctionReturn(0);
1265de80f912SBarry Smith }
1266de80f912SBarry Smith 
1267af674e45SBarry Smith /*
126881824310SBarry Smith     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1269af674e45SBarry Smith */
1270af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1271af674e45SBarry Smith   #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1272af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1273af674e45SBarry Smith   #define matsetvaluesblocked4_ matsetvaluesblocked4
1274af674e45SBarry Smith #endif
1275af674e45SBarry Smith 
1276d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA, PetscInt *mm, const PetscInt im[], PetscInt *nn, const PetscInt in[], const PetscScalar v[])
1277d71ae5a4SJacob Faibussowitsch {
1278af674e45SBarry Smith   Mat                A = *AA;
1279af674e45SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
1280c1ac3661SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, N, m = *mm, n = *nn;
1281c1ac3661SBarry Smith   PetscInt          *ai = a->i, *ailen = a->ilen;
128217ec6a02SBarry Smith   PetscInt          *aj = a->j, stepval, lastcol = -1;
1283f15d580aSBarry Smith   const PetscScalar *value = v;
12844bb09213Spetsc   MatScalar         *ap, *aa = a->a, *bap;
1285af674e45SBarry Smith 
1286af674e45SBarry Smith   PetscFunctionBegin;
1287ce94432eSBarry Smith   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Can only be called with a block size of 4");
1288af674e45SBarry Smith   stepval = (n - 1) * 4;
1289af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
1290af674e45SBarry Smith     row  = im[k];
1291af674e45SBarry Smith     rp   = aj + ai[row];
1292af674e45SBarry Smith     ap   = aa + 16 * ai[row];
1293af674e45SBarry Smith     nrow = ailen[row];
1294af674e45SBarry Smith     low  = 0;
129517ec6a02SBarry Smith     high = nrow;
1296af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
1297af674e45SBarry Smith       col = in[l];
1298db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1299db4deed7SKarl Rupp       else high = nrow;
130017ec6a02SBarry Smith       lastcol = col;
13011e3347e8SBarry Smith       value   = v + k * (stepval + 4 + l) * 4;
1302af674e45SBarry Smith       while (high - low > 7) {
1303af674e45SBarry Smith         t = (low + high) / 2;
1304af674e45SBarry Smith         if (rp[t] > col) high = t;
1305af674e45SBarry Smith         else low = t;
1306af674e45SBarry Smith       }
1307af674e45SBarry Smith       for (i = low; i < high; i++) {
1308af674e45SBarry Smith         if (rp[i] > col) break;
1309af674e45SBarry Smith         if (rp[i] == col) {
1310af674e45SBarry Smith           bap = ap + 16 * i;
1311af674e45SBarry Smith           for (ii = 0; ii < 4; ii++, value += stepval) {
1312ad540459SPierre Jolivet             for (jj = ii; jj < 16; jj += 4) bap[jj] += *value++;
1313af674e45SBarry Smith           }
1314af674e45SBarry Smith           goto noinsert2;
1315af674e45SBarry Smith         }
1316af674e45SBarry Smith       }
1317af674e45SBarry Smith       N = nrow++ - 1;
131817ec6a02SBarry Smith       high++; /* added new column index thus must search to one higher than before */
1319af674e45SBarry Smith       /* shift up all the later entries in this row */
1320af674e45SBarry Smith       for (ii = N; ii >= i; ii--) {
1321af674e45SBarry Smith         rp[ii + 1] = rp[ii];
13229566063dSJacob Faibussowitsch         PetscCallVoid(PetscArraycpy(ap + 16 * (ii + 1), ap + 16 * (ii), 16));
1323af674e45SBarry Smith       }
132448a46eb9SPierre Jolivet       if (N >= i) PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1325af674e45SBarry Smith       rp[i] = col;
1326af674e45SBarry Smith       bap   = ap + 16 * i;
1327af674e45SBarry Smith       for (ii = 0; ii < 4; ii++, value += stepval) {
1328ad540459SPierre Jolivet         for (jj = ii; jj < 16; jj += 4) bap[jj] = *value++;
1329af674e45SBarry Smith       }
1330af674e45SBarry Smith     noinsert2:;
1331af674e45SBarry Smith       low = i;
1332af674e45SBarry Smith     }
1333af674e45SBarry Smith     ailen[row] = nrow;
1334af674e45SBarry Smith   }
1335be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1336af674e45SBarry Smith }
1337af674e45SBarry Smith 
1338af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
1339af674e45SBarry Smith   #define matsetvalues4_ MATSETVALUES4
1340af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1341af674e45SBarry Smith   #define matsetvalues4_ matsetvalues4
1342af674e45SBarry Smith #endif
1343af674e45SBarry Smith 
1344d71ae5a4SJacob Faibussowitsch PETSC_EXTERN void matsetvalues4_(Mat *AA, PetscInt *mm, PetscInt *im, PetscInt *nn, PetscInt *in, PetscScalar *v)
1345d71ae5a4SJacob Faibussowitsch {
1346af674e45SBarry Smith   Mat          A = *AA;
1347af674e45SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
1348580bdb30SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, N, n = *nn, m = *mm;
1349c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
1350c1ac3661SBarry Smith   PetscInt    *aj = a->j, brow, bcol;
135117ec6a02SBarry Smith   PetscInt     ridx, cidx, lastcol = -1;
1352af674e45SBarry Smith   MatScalar   *ap, value, *aa      = a->a, *bap;
1353af674e45SBarry Smith 
1354af674e45SBarry Smith   PetscFunctionBegin;
1355af674e45SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
13569371c9d4SSatish Balay     row  = im[k];
13579371c9d4SSatish Balay     brow = row / 4;
1358af674e45SBarry Smith     rp   = aj + ai[brow];
1359af674e45SBarry Smith     ap   = aa + 16 * ai[brow];
1360af674e45SBarry Smith     nrow = ailen[brow];
1361af674e45SBarry Smith     low  = 0;
136217ec6a02SBarry Smith     high = nrow;
1363af674e45SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
13649371c9d4SSatish Balay       col   = in[l];
13659371c9d4SSatish Balay       bcol  = col / 4;
13669371c9d4SSatish Balay       ridx  = row % 4;
13679371c9d4SSatish Balay       cidx  = col % 4;
1368af674e45SBarry Smith       value = v[l + k * n];
1369db4deed7SKarl Rupp       if (col <= lastcol) low = 0;
1370db4deed7SKarl Rupp       else high = nrow;
137117ec6a02SBarry Smith       lastcol = col;
1372af674e45SBarry Smith       while (high - low > 7) {
1373af674e45SBarry Smith         t = (low + high) / 2;
1374af674e45SBarry Smith         if (rp[t] > bcol) high = t;
1375af674e45SBarry Smith         else low = t;
1376af674e45SBarry Smith       }
1377af674e45SBarry Smith       for (i = low; i < high; i++) {
1378af674e45SBarry Smith         if (rp[i] > bcol) break;
1379af674e45SBarry Smith         if (rp[i] == bcol) {
1380af674e45SBarry Smith           bap = ap + 16 * i + 4 * cidx + ridx;
1381af674e45SBarry Smith           *bap += value;
1382af674e45SBarry Smith           goto noinsert1;
1383af674e45SBarry Smith         }
1384af674e45SBarry Smith       }
1385af674e45SBarry Smith       N = nrow++ - 1;
138617ec6a02SBarry Smith       high++; /* added new column thus must search to one higher than before */
1387af674e45SBarry Smith       /* shift up all the later entries in this row */
13889566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
13899566063dSJacob Faibussowitsch       PetscCallVoid(PetscArraymove(ap + 16 * i + 16, ap + 16 * i, 16 * (N - i + 1)));
13909566063dSJacob Faibussowitsch       PetscCallVoid(PetscArrayzero(ap + 16 * i, 16));
1391af674e45SBarry Smith       rp[i]                        = bcol;
1392af674e45SBarry Smith       ap[16 * i + 4 * cidx + ridx] = value;
1393af674e45SBarry Smith     noinsert1:;
1394af674e45SBarry Smith       low = i;
1395af674e45SBarry Smith     }
1396af674e45SBarry Smith     ailen[brow] = nrow;
1397af674e45SBarry Smith   }
1398be1d678aSKris Buschelman   PetscFunctionReturnVoid();
1399af674e45SBarry Smith }
1400af674e45SBarry Smith 
1401be5855fcSBarry Smith /*
1402be5855fcSBarry Smith      Checks for missing diagonals
1403be5855fcSBarry Smith */
1404d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1405d71ae5a4SJacob Faibussowitsch {
1406be5855fcSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14077734d3b5SMatthew G. Knepley   PetscInt    *diag, *ii = a->i, i;
1408be5855fcSBarry Smith 
1409be5855fcSBarry Smith   PetscFunctionBegin;
14109566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(A));
14112af78befSBarry Smith   *missing = PETSC_FALSE;
14127734d3b5SMatthew G. Knepley   if (A->rmap->n > 0 && !ii) {
14132efa7f71SHong Zhang     *missing = PETSC_TRUE;
14142efa7f71SHong Zhang     if (d) *d = 0;
14159566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Matrix has no entries therefore is missing diagonal\n"));
14162efa7f71SHong Zhang   } else {
141701445905SHong Zhang     PetscInt n;
141801445905SHong Zhang     n    = PetscMin(a->mbs, a->nbs);
1419883fce79SBarry Smith     diag = a->diag;
142001445905SHong Zhang     for (i = 0; i < n; i++) {
14217734d3b5SMatthew G. Knepley       if (diag[i] >= ii[i + 1]) {
14222af78befSBarry Smith         *missing = PETSC_TRUE;
14232af78befSBarry Smith         if (d) *d = i;
14249566063dSJacob Faibussowitsch         PetscCall(PetscInfo(A, "Matrix is missing block diagonal number %" PetscInt_FMT "\n", i));
1425358d2f5dSShri Abhyankar         break;
14262efa7f71SHong Zhang       }
1427be5855fcSBarry Smith     }
1428be5855fcSBarry Smith   }
1429be5855fcSBarry Smith   PetscFunctionReturn(0);
1430be5855fcSBarry Smith }
1431be5855fcSBarry Smith 
1432d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1433d71ae5a4SJacob Faibussowitsch {
1434de6a44a3SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
143509f38230SBarry Smith   PetscInt     i, j, m = a->mbs;
1436de6a44a3SBarry Smith 
14373a40ed3dSBarry Smith   PetscFunctionBegin;
143809f38230SBarry Smith   if (!a->diag) {
14399566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(m, &a->diag));
14404fd072dbSBarry Smith     a->free_diag = PETSC_TRUE;
144109f38230SBarry Smith   }
14427fc0212eSBarry Smith   for (i = 0; i < m; i++) {
144309f38230SBarry Smith     a->diag[i] = a->i[i + 1];
1444de6a44a3SBarry Smith     for (j = a->i[i]; j < a->i[i + 1]; j++) {
1445de6a44a3SBarry Smith       if (a->j[j] == i) {
144609f38230SBarry Smith         a->diag[i] = j;
1447de6a44a3SBarry Smith         break;
1448de6a44a3SBarry Smith       }
1449de6a44a3SBarry Smith     }
1450de6a44a3SBarry Smith   }
14513a40ed3dSBarry Smith   PetscFunctionReturn(0);
1452de6a44a3SBarry Smith }
14532593348eSBarry Smith 
1454d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *inia[], const PetscInt *inja[], PetscBool *done)
1455d71ae5a4SJacob Faibussowitsch {
14563b2fbd54SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
14571a83f524SJed Brown   PetscInt     i, j, n = a->mbs, nz = a->i[n], *tia, *tja, bs = A->rmap->bs, k, l, cnt;
14581a83f524SJed Brown   PetscInt   **ia = (PetscInt **)inia, **ja = (PetscInt **)inja;
14593b2fbd54SBarry Smith 
14603a40ed3dSBarry Smith   PetscFunctionBegin;
14613b2fbd54SBarry Smith   *nn = n;
14623a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
14633b2fbd54SBarry Smith   if (symmetric) {
14649566063dSJacob Faibussowitsch     PetscCall(MatToSymmetricIJ_SeqAIJ(n, a->i, a->j, PETSC_TRUE, 0, 0, &tia, &tja));
1465553b3c51SBarry Smith     nz = tia[n];
14663b2fbd54SBarry Smith   } else {
14679371c9d4SSatish Balay     tia = a->i;
14689371c9d4SSatish Balay     tja = a->j;
14693b2fbd54SBarry Smith   }
14703b2fbd54SBarry Smith 
1471ecc77c7aSBarry Smith   if (!blockcompressed && bs > 1) {
1472ecc77c7aSBarry Smith     (*nn) *= bs;
14738f7157efSSatish Balay     /* malloc & create the natural set of indices */
14749566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1((n + 1) * bs, ia));
14759985e31cSBarry Smith     if (n) {
14762462f5fdSStefano Zampini       (*ia)[0] = oshift;
1477ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[j] = (tia[1] - tia[0]) * bs + (*ia)[j - 1];
14789985e31cSBarry Smith     }
1479ecc77c7aSBarry Smith 
1480ecc77c7aSBarry Smith     for (i = 1; i < n; i++) {
1481ecc77c7aSBarry Smith       (*ia)[i * bs] = (tia[i] - tia[i - 1]) * bs + (*ia)[i * bs - 1];
1482ad540459SPierre Jolivet       for (j = 1; j < bs; j++) (*ia)[i * bs + j] = (tia[i + 1] - tia[i]) * bs + (*ia)[i * bs + j - 1];
14838f7157efSSatish Balay     }
1484ad540459SPierre Jolivet     if (n) (*ia)[n * bs] = (tia[n] - tia[n - 1]) * bs + (*ia)[n * bs - 1];
1485ecc77c7aSBarry Smith 
14861a83f524SJed Brown     if (inja) {
14879566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz * bs * bs, ja));
14889985e31cSBarry Smith       cnt = 0;
14899985e31cSBarry Smith       for (i = 0; i < n; i++) {
14909985e31cSBarry Smith         for (j = 0; j < bs; j++) {
14919985e31cSBarry Smith           for (k = tia[i]; k < tia[i + 1]; k++) {
1492ad540459SPierre Jolivet             for (l = 0; l < bs; l++) (*ja)[cnt++] = bs * tja[k] + l;
14939985e31cSBarry Smith           }
14949985e31cSBarry Smith         }
14959985e31cSBarry Smith       }
14969985e31cSBarry Smith     }
14979985e31cSBarry Smith 
14988f7157efSSatish Balay     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
14999566063dSJacob Faibussowitsch       PetscCall(PetscFree(tia));
15009566063dSJacob Faibussowitsch       PetscCall(PetscFree(tja));
15018f7157efSSatish Balay     }
1502f6d58c54SBarry Smith   } else if (oshift == 1) {
1503715a17b5SBarry Smith     if (symmetric) {
1504a2ea699eSBarry Smith       nz = tia[A->rmap->n / bs];
1505715a17b5SBarry Smith       /*  add 1 to i and j indices */
1506715a17b5SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) tia[i] = tia[i] + 1;
1507715a17b5SBarry Smith       *ia = tia;
1508715a17b5SBarry Smith       if (ja) {
1509715a17b5SBarry Smith         for (i = 0; i < nz; i++) tja[i] = tja[i] + 1;
1510715a17b5SBarry Smith         *ja = tja;
1511715a17b5SBarry Smith       }
1512715a17b5SBarry Smith     } else {
1513a2ea699eSBarry Smith       nz = a->i[A->rmap->n / bs];
1514f6d58c54SBarry Smith       /* malloc space and  add 1 to i and j indices */
15159566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(A->rmap->n / bs + 1, ia));
1516f6d58c54SBarry Smith       for (i = 0; i < A->rmap->n / bs + 1; i++) (*ia)[i] = a->i[i] + 1;
1517f6d58c54SBarry Smith       if (ja) {
15189566063dSJacob Faibussowitsch         PetscCall(PetscMalloc1(nz, ja));
1519f6d58c54SBarry Smith         for (i = 0; i < nz; i++) (*ja)[i] = a->j[i] + 1;
1520f6d58c54SBarry Smith       }
1521715a17b5SBarry Smith     }
15228f7157efSSatish Balay   } else {
15238f7157efSSatish Balay     *ia = tia;
1524ecc77c7aSBarry Smith     if (ja) *ja = tja;
15258f7157efSSatish Balay   }
15263a40ed3dSBarry Smith   PetscFunctionReturn(0);
15273b2fbd54SBarry Smith }
15283b2fbd54SBarry Smith 
1529d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool blockcompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
1530d71ae5a4SJacob Faibussowitsch {
15313a40ed3dSBarry Smith   PetscFunctionBegin;
15323a40ed3dSBarry Smith   if (!ia) PetscFunctionReturn(0);
1533715a17b5SBarry Smith   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
15349566063dSJacob Faibussowitsch     PetscCall(PetscFree(*ia));
15359566063dSJacob Faibussowitsch     if (ja) PetscCall(PetscFree(*ja));
15363b2fbd54SBarry Smith   }
15373a40ed3dSBarry Smith   PetscFunctionReturn(0);
15383b2fbd54SBarry Smith }
15393b2fbd54SBarry Smith 
1540d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1541d71ae5a4SJacob Faibussowitsch {
15422d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15432d61bbb3SSatish Balay 
1544433994e6SBarry Smith   PetscFunctionBegin;
1545aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1546c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)A, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT ", NZ=%" PetscInt_FMT, A->rmap->N, A->cmap->n, a->nz);
15472d61bbb3SSatish Balay #endif
15489566063dSJacob Faibussowitsch   PetscCall(MatSeqXAIJFreeAIJ(A, &a->a, &a->j, &a->i));
15499566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
15509566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
15519566063dSJacob Faibussowitsch   if (a->free_diag) PetscCall(PetscFree(a->diag));
15529566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->idiag));
15539566063dSJacob Faibussowitsch   if (a->free_imax_ilen) PetscCall(PetscFree2(a->imax, a->ilen));
15549566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->solve_work));
15559566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->mult_work));
15569566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_workt));
15579566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->sor_work));
15589566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
15599566063dSJacob Faibussowitsch   PetscCall(PetscFree(a->saved_values));
15609566063dSJacob Faibussowitsch   PetscCall(PetscFree2(a->compressedrow.i, a->compressedrow.rindex));
1561c4319e64SHong Zhang 
15629566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->sbaijMat));
15639566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&a->parent));
15649566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
1565901853e0SKris Buschelman 
15669566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL));
15679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJGetArray_C", NULL));
15689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJRestoreArray_C", NULL));
15699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatStoreValues_C", NULL));
15709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatRetrieveValues_C", NULL));
15719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetColumnIndices_C", NULL));
15729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqaij_C", NULL));
15739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqsbaij_C", NULL));
15749566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocation_C", NULL));
15759566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSeqBAIJSetPreallocationCSR_C", NULL));
15769566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_seqbstrm_C", NULL));
15779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatIsTranspose_C", NULL));
15787ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
15799566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_hypre_C", NULL));
15807ea3e4caSstefano_zampini #endif
15819566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqbaij_is_C", NULL));
15822e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
15832d61bbb3SSatish Balay   PetscFunctionReturn(0);
15842d61bbb3SSatish Balay }
15852d61bbb3SSatish Balay 
1586d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_SeqBAIJ(Mat A, MatOption op, PetscBool flg)
1587d71ae5a4SJacob Faibussowitsch {
15882d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
15892d61bbb3SSatish Balay 
15902d61bbb3SSatish Balay   PetscFunctionBegin;
1591aa275fccSKris Buschelman   switch (op) {
1592d71ae5a4SJacob Faibussowitsch   case MAT_ROW_ORIENTED:
1593d71ae5a4SJacob Faibussowitsch     a->roworiented = flg;
1594d71ae5a4SJacob Faibussowitsch     break;
1595d71ae5a4SJacob Faibussowitsch   case MAT_KEEP_NONZERO_PATTERN:
1596d71ae5a4SJacob Faibussowitsch     a->keepnonzeropattern = flg;
1597d71ae5a4SJacob Faibussowitsch     break;
1598d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATIONS:
1599d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? 0 : 1);
1600d71ae5a4SJacob Faibussowitsch     break;
1601d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_LOCATION_ERR:
1602d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -1 : 0);
1603d71ae5a4SJacob Faibussowitsch     break;
1604d71ae5a4SJacob Faibussowitsch   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1605d71ae5a4SJacob Faibussowitsch     a->nonew = (flg ? -2 : 0);
1606d71ae5a4SJacob Faibussowitsch     break;
1607d71ae5a4SJacob Faibussowitsch   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1608d71ae5a4SJacob Faibussowitsch     a->nounused = (flg ? -1 : 0);
1609d71ae5a4SJacob Faibussowitsch     break;
16108c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1611aa275fccSKris Buschelman   case MAT_IGNORE_OFF_PROC_ENTRIES:
1612aa275fccSKris Buschelman   case MAT_USE_HASH_TABLE:
1613d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1614d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1615d71ae5a4SJacob Faibussowitsch     break;
16165021d80fSJed Brown   case MAT_SPD:
161777e54ba9SKris Buschelman   case MAT_SYMMETRIC:
161877e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
16199a4540c5SBarry Smith   case MAT_HERMITIAN:
16209a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1621b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1622c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1623672ba085SHong Zhang   case MAT_STRUCTURE_ONLY:
1624b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1625b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
162677e54ba9SKris Buschelman     break;
1627d71ae5a4SJacob Faibussowitsch   default:
1628d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
16292d61bbb3SSatish Balay   }
16302d61bbb3SSatish Balay   PetscFunctionReturn(0);
16312d61bbb3SSatish Balay }
16322d61bbb3SSatish Balay 
163352768537SHong Zhang /* used for both SeqBAIJ and SeqSBAIJ matrices */
1634d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v, PetscInt *ai, PetscInt *aj, PetscScalar *aa)
1635d71ae5a4SJacob Faibussowitsch {
163652768537SHong Zhang   PetscInt     itmp, i, j, k, M, bn, bp, *idx_i, bs, bs2;
163752768537SHong Zhang   MatScalar   *aa_i;
163887828ca2SBarry Smith   PetscScalar *v_i;
16392d61bbb3SSatish Balay 
16402d61bbb3SSatish Balay   PetscFunctionBegin;
1641d0f46423SBarry Smith   bs  = A->rmap->bs;
164252768537SHong Zhang   bs2 = bs * bs;
16435f80ce2aSJacob Faibussowitsch   PetscCheck(row >= 0 && row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row);
16442d61bbb3SSatish Balay 
16452d61bbb3SSatish Balay   bn  = row / bs; /* Block number */
16462d61bbb3SSatish Balay   bp  = row % bs; /* Block Position */
16472d61bbb3SSatish Balay   M   = ai[bn + 1] - ai[bn];
16482d61bbb3SSatish Balay   *nz = bs * M;
16492d61bbb3SSatish Balay 
16502d61bbb3SSatish Balay   if (v) {
1651f4259b30SLisandro Dalcin     *v = NULL;
16522d61bbb3SSatish Balay     if (*nz) {
16539566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, v));
16542d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16552d61bbb3SSatish Balay         v_i  = *v + i * bs;
16562d61bbb3SSatish Balay         aa_i = aa + bs2 * (ai[bn] + i);
165726fbe8dcSKarl Rupp         for (j = bp, k = 0; j < bs2; j += bs, k++) v_i[k] = aa_i[j];
16582d61bbb3SSatish Balay       }
16592d61bbb3SSatish Balay     }
16602d61bbb3SSatish Balay   }
16612d61bbb3SSatish Balay 
16622d61bbb3SSatish Balay   if (idx) {
1663f4259b30SLisandro Dalcin     *idx = NULL;
16642d61bbb3SSatish Balay     if (*nz) {
16659566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(*nz, idx));
16662d61bbb3SSatish Balay       for (i = 0; i < M; i++) { /* for each block in the block row */
16672d61bbb3SSatish Balay         idx_i = *idx + i * bs;
16682d61bbb3SSatish Balay         itmp  = bs * aj[ai[bn] + i];
166926fbe8dcSKarl Rupp         for (j = 0; j < bs; j++) idx_i[j] = itmp++;
16702d61bbb3SSatish Balay       }
16712d61bbb3SSatish Balay     }
16722d61bbb3SSatish Balay   }
16732d61bbb3SSatish Balay   PetscFunctionReturn(0);
16742d61bbb3SSatish Balay }
16752d61bbb3SSatish Balay 
1676d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1677d71ae5a4SJacob Faibussowitsch {
167852768537SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
167952768537SHong Zhang 
168052768537SHong Zhang   PetscFunctionBegin;
16819566063dSJacob Faibussowitsch   PetscCall(MatGetRow_SeqBAIJ_private(A, row, nz, idx, v, a->i, a->j, a->a));
168252768537SHong Zhang   PetscFunctionReturn(0);
168352768537SHong Zhang }
168452768537SHong Zhang 
1685d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1686d71ae5a4SJacob Faibussowitsch {
16872d61bbb3SSatish Balay   PetscFunctionBegin;
1688cb4a9cd9SHong Zhang   if (nz) *nz = 0;
16899566063dSJacob Faibussowitsch   if (idx) PetscCall(PetscFree(*idx));
16909566063dSJacob Faibussowitsch   if (v) PetscCall(PetscFree(*v));
16912d61bbb3SSatish Balay   PetscFunctionReturn(0);
16922d61bbb3SSatish Balay }
16932d61bbb3SSatish Balay 
1694d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_SeqBAIJ(Mat A, MatReuse reuse, Mat *B)
1695d71ae5a4SJacob Faibussowitsch {
169620e84f26SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *at;
16972d61bbb3SSatish Balay   Mat          C;
169820e84f26SHong Zhang   PetscInt     i, j, k, *aj = a->j, *ai = a->i, bs = A->rmap->bs, mbs = a->mbs, nbs = a->nbs, *atfill;
169920e84f26SHong Zhang   PetscInt     bs2 = a->bs2, *ati, *atj, anzj, kr;
170020e84f26SHong Zhang   MatScalar   *ata, *aa = a->a;
17012d61bbb3SSatish Balay 
17022d61bbb3SSatish Balay   PetscFunctionBegin;
17037fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
17049566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(1 + nbs, &atfill));
1705cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
170620e84f26SHong Zhang     for (i = 0; i < ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
17072d61bbb3SSatish Balay 
17089566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &C));
17099566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(C, A->cmap->n, A->rmap->N, A->cmap->n, A->rmap->N));
17109566063dSJacob Faibussowitsch     PetscCall(MatSetType(C, ((PetscObject)A)->type_name));
17119566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(C, bs, 0, atfill));
171220e84f26SHong Zhang 
171320e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
171420e84f26SHong Zhang     ati = at->i;
171520e84f26SHong Zhang     for (i = 0; i < nbs; i++) at->ilen[i] = at->imax[i] = ati[i + 1] - ati[i];
1716fc4dec0aSBarry Smith   } else {
1717fc4dec0aSBarry Smith     C   = *B;
171820e84f26SHong Zhang     at  = (Mat_SeqBAIJ *)C->data;
171920e84f26SHong Zhang     ati = at->i;
1720fc4dec0aSBarry Smith   }
1721fc4dec0aSBarry Smith 
172220e84f26SHong Zhang   atj = at->j;
172320e84f26SHong Zhang   ata = at->a;
172420e84f26SHong Zhang 
172520e84f26SHong Zhang   /* Copy ati into atfill so we have locations of the next free space in atj */
17269566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(atfill, ati, nbs));
172720e84f26SHong Zhang 
172820e84f26SHong Zhang   /* Walk through A row-wise and mark nonzero entries of A^T. */
17292d61bbb3SSatish Balay   for (i = 0; i < mbs; i++) {
173020e84f26SHong Zhang     anzj = ai[i + 1] - ai[i];
173120e84f26SHong Zhang     for (j = 0; j < anzj; j++) {
173220e84f26SHong Zhang       atj[atfill[*aj]] = i;
173320e84f26SHong Zhang       for (kr = 0; kr < bs; kr++) {
1734ad540459SPierre Jolivet         for (k = 0; k < bs; k++) ata[bs2 * atfill[*aj] + k * bs + kr] = *aa++;
17352d61bbb3SSatish Balay       }
173620e84f26SHong Zhang       atfill[*aj++] += 1;
173720e84f26SHong Zhang     }
173820e84f26SHong Zhang   }
17399566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY));
17409566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY));
17412d61bbb3SSatish Balay 
174220e84f26SHong Zhang   /* Clean up temporary space and complete requests. */
17439566063dSJacob Faibussowitsch   PetscCall(PetscFree(atfill));
174420e84f26SHong Zhang 
1745cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
17469566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(C, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs)));
17472d61bbb3SSatish Balay     *B = C;
17482d61bbb3SSatish Balay   } else {
17499566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(A, &C));
17502d61bbb3SSatish Balay   }
17512d61bbb3SSatish Balay   PetscFunctionReturn(0);
17522d61bbb3SSatish Balay }
17532d61bbb3SSatish Balay 
1754d71ae5a4SJacob Faibussowitsch PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A, Mat B, PetscReal tol, PetscBool *f)
1755d71ae5a4SJacob Faibussowitsch {
1756453d3561SHong Zhang   Mat Btrans;
1757453d3561SHong Zhang 
1758453d3561SHong Zhang   PetscFunctionBegin;
1759453d3561SHong Zhang   *f = PETSC_FALSE;
1760acd337a6SBarry Smith   PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &Btrans));
17619566063dSJacob Faibussowitsch   PetscCall(MatEqual_SeqBAIJ(B, Btrans, f));
17629566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&Btrans));
1763453d3561SHong Zhang   PetscFunctionReturn(0);
1764453d3561SHong Zhang }
1765453d3561SHong Zhang 
1766618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1767d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
1768d71ae5a4SJacob Faibussowitsch {
1769b51a4376SLisandro Dalcin   Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)mat->data;
1770b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, m, bs, nz, cnt, i, j, k, l;
1771b51a4376SLisandro Dalcin   PetscInt    *rowlens, *colidxs;
1772b51a4376SLisandro Dalcin   PetscScalar *matvals;
17732593348eSBarry Smith 
17743a40ed3dSBarry Smith   PetscFunctionBegin;
17759566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
17763b2fbd54SBarry Smith 
1777b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1778b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1779b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1780b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1781b51a4376SLisandro Dalcin   nz = bs * bs * A->nz;
17822593348eSBarry Smith 
1783b51a4376SLisandro Dalcin   /* write matrix header */
1784b51a4376SLisandro Dalcin   header[0] = MAT_FILE_CLASSID;
17859371c9d4SSatish Balay   header[1] = M;
17869371c9d4SSatish Balay   header[2] = N;
17879371c9d4SSatish Balay   header[3] = nz;
17889566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
17892593348eSBarry Smith 
1790b51a4376SLisandro Dalcin   /* store row lengths */
17919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1792b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
17939371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i]);
17949566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, rowlens, m, PETSC_INT));
17959566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1796b51a4376SLisandro Dalcin 
1797b51a4376SLisandro Dalcin   /* store column indices  */
17989566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1799b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1800b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1801b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18029371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[j] + l;
18035f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18049566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, colidxs, nz, PETSC_INT));
18059566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
18062593348eSBarry Smith 
18072593348eSBarry Smith   /* store nonzero values */
18089566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1809b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
1810b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++)
1811b51a4376SLisandro Dalcin       for (j = A->i[i]; j < A->i[i + 1]; j++)
18129371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * j + l) + k];
18135f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
18149566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, matvals, nz, PETSC_SCALAR));
18159566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1816ce6f0cecSBarry Smith 
1817b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
18189566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
18193a40ed3dSBarry Smith   PetscFunctionReturn(0);
18202593348eSBarry Smith }
18212593348eSBarry Smith 
1822d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A, PetscViewer viewer)
1823d71ae5a4SJacob Faibussowitsch {
18247dc0baabSHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
18257dc0baabSHong Zhang   PetscInt     i, bs = A->rmap->bs, k;
18267dc0baabSHong Zhang 
18277dc0baabSHong Zhang   PetscFunctionBegin;
18289566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
18297dc0baabSHong Zhang   for (i = 0; i < a->mbs; i++) {
18309566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT "-%" PetscInt_FMT ":", i * bs, i * bs + bs - 1));
183148a46eb9SPierre Jolivet     for (k = a->i[i]; k < a->i[i + 1]; k++) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT "-%" PetscInt_FMT ") ", bs * a->j[k], bs * a->j[k] + bs - 1));
18329566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
18337dc0baabSHong Zhang   }
18349566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18357dc0baabSHong Zhang   PetscFunctionReturn(0);
18367dc0baabSHong Zhang }
18377dc0baabSHong Zhang 
1838d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A, PetscViewer viewer)
1839d71ae5a4SJacob Faibussowitsch {
1840b6490206SBarry Smith   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1841d0f46423SBarry Smith   PetscInt          i, j, bs = A->rmap->bs, k, l, bs2 = a->bs2;
1842f3ef73ceSBarry Smith   PetscViewerFormat format;
18432593348eSBarry Smith 
18443a40ed3dSBarry Smith   PetscFunctionBegin;
18457dc0baabSHong Zhang   if (A->structure_only) {
18469566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII_structonly(A, viewer));
18477dc0baabSHong Zhang     PetscFunctionReturn(0);
18487dc0baabSHong Zhang   }
18497dc0baabSHong Zhang 
18509566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
1851456192e2SBarry Smith   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
18529566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
1853fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1854ade3a672SBarry Smith     const char *matname;
1855bcd9e38bSBarry Smith     Mat         aij;
18569566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &aij));
18579566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)A, &matname));
18589566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)aij, matname));
18599566063dSJacob Faibussowitsch     PetscCall(MatView(aij, viewer));
18609566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&aij));
186104929863SHong Zhang   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
186204929863SHong Zhang     PetscFunctionReturn(0);
1863fb9695e5SSatish Balay   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
18649566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
186544cd7ae7SLois Curfman McInnes     for (i = 0; i < a->mbs; i++) {
186644cd7ae7SLois Curfman McInnes       for (j = 0; j < bs; j++) {
18679566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
186844cd7ae7SLois Curfman McInnes         for (k = a->i[i]; k < a->i[i + 1]; k++) {
186944cd7ae7SLois Curfman McInnes           for (l = 0; l < bs; l++) {
1870aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18710e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18729371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18730e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0 && PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18749371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %gi) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18750e6d2581SBarry Smith             } else if (PetscRealPart(a->a[bs2 * k + l * bs + j]) != 0.0) {
18769566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
18770ef38995SBarry Smith             }
187844cd7ae7SLois Curfman McInnes #else
187948a46eb9SPierre Jolivet             if (a->a[bs2 * k + l * bs + j] != 0.0) PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
188044cd7ae7SLois Curfman McInnes #endif
188144cd7ae7SLois Curfman McInnes           }
188244cd7ae7SLois Curfman McInnes         }
18839566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
188444cd7ae7SLois Curfman McInnes       }
188544cd7ae7SLois Curfman McInnes     }
18869566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
18870ef38995SBarry Smith   } else {
18889566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_FALSE));
1889b6490206SBarry Smith     for (i = 0; i < a->mbs; i++) {
1890b6490206SBarry Smith       for (j = 0; j < bs; j++) {
18919566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ":", i * bs + j));
1892b6490206SBarry Smith         for (k = a->i[i]; k < a->i[i + 1]; k++) {
1893b6490206SBarry Smith           for (l = 0; l < bs; l++) {
1894aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX)
18950e6d2581SBarry Smith             if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) > 0.0) {
18969371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g + %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), (double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18970e6d2581SBarry Smith             } else if (PetscImaginaryPart(a->a[bs2 * k + l * bs + j]) < 0.0) {
18989371c9d4SSatish Balay               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g - %g i) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j]), -(double)PetscImaginaryPart(a->a[bs2 * k + l * bs + j])));
18990ef38995SBarry Smith             } else {
19009566063dSJacob Faibussowitsch               PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)PetscRealPart(a->a[bs2 * k + l * bs + j])));
190188685aaeSLois Curfman McInnes             }
190288685aaeSLois Curfman McInnes #else
19039566063dSJacob Faibussowitsch             PetscCall(PetscViewerASCIIPrintf(viewer, " (%" PetscInt_FMT ", %g) ", bs * a->j[k] + l, (double)a->a[bs2 * k + l * bs + j]));
190488685aaeSLois Curfman McInnes #endif
19052593348eSBarry Smith           }
19062593348eSBarry Smith         }
19079566063dSJacob Faibussowitsch         PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
19082593348eSBarry Smith       }
19092593348eSBarry Smith     }
19109566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIUseTabs(viewer, PETSC_TRUE));
1911b6490206SBarry Smith   }
19129566063dSJacob Faibussowitsch   PetscCall(PetscViewerFlush(viewer));
19133a40ed3dSBarry Smith   PetscFunctionReturn(0);
19142593348eSBarry Smith }
19152593348eSBarry Smith 
19169804daf3SBarry Smith #include <petscdraw.h>
1917d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw, void *Aa)
1918d71ae5a4SJacob Faibussowitsch {
191977ed5343SBarry Smith   Mat               A = (Mat)Aa;
19203270192aSSatish Balay   Mat_SeqBAIJ      *a = (Mat_SeqBAIJ *)A->data;
1921d0f46423SBarry Smith   PetscInt          row, i, j, k, l, mbs = a->mbs, color, bs = A->rmap->bs, bs2 = a->bs2;
19220e6d2581SBarry Smith   PetscReal         xl, yl, xr, yr, x_l, x_r, y_l, y_r;
19233f1db9ecSBarry Smith   MatScalar        *aa;
1924b0a32e0cSBarry Smith   PetscViewer       viewer;
1925b3e7f47fSJed Brown   PetscViewerFormat format;
19263270192aSSatish Balay 
19273a40ed3dSBarry Smith   PetscFunctionBegin;
19289566063dSJacob Faibussowitsch   PetscCall(PetscObjectQuery((PetscObject)A, "Zoomviewer", (PetscObject *)&viewer));
19299566063dSJacob Faibussowitsch   PetscCall(PetscViewerGetFormat(viewer, &format));
19309566063dSJacob Faibussowitsch   PetscCall(PetscDrawGetCoordinates(draw, &xl, &yl, &xr, &yr));
193177ed5343SBarry Smith 
19323270192aSSatish Balay   /* loop over matrix elements drawing boxes */
1933b3e7f47fSJed Brown 
1934b3e7f47fSJed Brown   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1935d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
1936383922c3SLisandro Dalcin     /* Blue for negative, Cyan for zero and  Red for positive */
1937b0a32e0cSBarry Smith     color = PETSC_DRAW_BLUE;
19383270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19393270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19409371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19419371c9d4SSatish Balay         y_r = y_l + 1.0;
19429371c9d4SSatish Balay         x_l = a->j[j] * bs;
19439371c9d4SSatish Balay         x_r = x_l + 1.0;
19443270192aSSatish Balay         aa  = a->a + j * bs2;
19453270192aSSatish Balay         for (k = 0; k < bs; k++) {
19463270192aSSatish Balay           for (l = 0; l < bs; l++) {
19470e6d2581SBarry Smith             if (PetscRealPart(*aa++) >= 0.) continue;
19489566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19493270192aSSatish Balay           }
19503270192aSSatish Balay         }
19513270192aSSatish Balay       }
19523270192aSSatish Balay     }
1953b0a32e0cSBarry Smith     color = PETSC_DRAW_CYAN;
19543270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19553270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19569371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19579371c9d4SSatish Balay         y_r = y_l + 1.0;
19589371c9d4SSatish Balay         x_l = a->j[j] * bs;
19599371c9d4SSatish Balay         x_r = x_l + 1.0;
19603270192aSSatish Balay         aa  = a->a + j * bs2;
19613270192aSSatish Balay         for (k = 0; k < bs; k++) {
19623270192aSSatish Balay           for (l = 0; l < bs; l++) {
19630e6d2581SBarry Smith             if (PetscRealPart(*aa++) != 0.) continue;
19649566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19653270192aSSatish Balay           }
19663270192aSSatish Balay         }
19673270192aSSatish Balay       }
19683270192aSSatish Balay     }
1969b0a32e0cSBarry Smith     color = PETSC_DRAW_RED;
19703270192aSSatish Balay     for (i = 0, row = 0; i < mbs; i++, row += bs) {
19713270192aSSatish Balay       for (j = a->i[i]; j < a->i[i + 1]; j++) {
19729371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
19739371c9d4SSatish Balay         y_r = y_l + 1.0;
19749371c9d4SSatish Balay         x_l = a->j[j] * bs;
19759371c9d4SSatish Balay         x_r = x_l + 1.0;
19763270192aSSatish Balay         aa  = a->a + j * bs2;
19773270192aSSatish Balay         for (k = 0; k < bs; k++) {
19783270192aSSatish Balay           for (l = 0; l < bs; l++) {
19790e6d2581SBarry Smith             if (PetscRealPart(*aa++) <= 0.) continue;
19809566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
19813270192aSSatish Balay           }
19823270192aSSatish Balay         }
19833270192aSSatish Balay       }
19843270192aSSatish Balay     }
1985d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
1986b3e7f47fSJed Brown   } else {
1987b3e7f47fSJed Brown     /* use contour shading to indicate magnitude of values */
1988b3e7f47fSJed Brown     /* first determine max of all nonzero values */
1989b05fc000SLisandro Dalcin     PetscReal minv = 0.0, maxv = 0.0;
1990b3e7f47fSJed Brown     PetscDraw popup;
1991b3e7f47fSJed Brown 
1992b3e7f47fSJed Brown     for (i = 0; i < a->nz * a->bs2; i++) {
1993b3e7f47fSJed Brown       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1994b3e7f47fSJed Brown     }
1995383922c3SLisandro Dalcin     if (minv >= maxv) maxv = minv + PETSC_SMALL;
19969566063dSJacob Faibussowitsch     PetscCall(PetscDrawGetPopup(draw, &popup));
19979566063dSJacob Faibussowitsch     PetscCall(PetscDrawScalePopup(popup, 0.0, maxv));
1998383922c3SLisandro Dalcin 
1999d0609cedSBarry Smith     PetscDrawCollectiveBegin(draw);
2000b3e7f47fSJed Brown     for (i = 0, row = 0; i < mbs; i++, row += bs) {
2001b3e7f47fSJed Brown       for (j = a->i[i]; j < a->i[i + 1]; j++) {
20029371c9d4SSatish Balay         y_l = A->rmap->N - row - 1.0;
20039371c9d4SSatish Balay         y_r = y_l + 1.0;
20049371c9d4SSatish Balay         x_l = a->j[j] * bs;
20059371c9d4SSatish Balay         x_r = x_l + 1.0;
2006b3e7f47fSJed Brown         aa  = a->a + j * bs2;
2007b3e7f47fSJed Brown         for (k = 0; k < bs; k++) {
2008b3e7f47fSJed Brown           for (l = 0; l < bs; l++) {
2009383922c3SLisandro Dalcin             MatScalar v = *aa++;
2010383922c3SLisandro Dalcin             color       = PetscDrawRealToColor(PetscAbsScalar(v), minv, maxv);
20119566063dSJacob Faibussowitsch             PetscCall(PetscDrawRectangle(draw, x_l + k, y_l - l, x_r + k, y_r - l, color, color, color, color));
2012b3e7f47fSJed Brown           }
2013b3e7f47fSJed Brown         }
2014b3e7f47fSJed Brown       }
2015b3e7f47fSJed Brown     }
2016d0609cedSBarry Smith     PetscDrawCollectiveEnd(draw);
2017b3e7f47fSJed Brown   }
201877ed5343SBarry Smith   PetscFunctionReturn(0);
201977ed5343SBarry Smith }
20203270192aSSatish Balay 
2021d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A, PetscViewer viewer)
2022d71ae5a4SJacob Faibussowitsch {
20230e6d2581SBarry Smith   PetscReal xl, yl, xr, yr, w, h;
2024b0a32e0cSBarry Smith   PetscDraw draw;
2025ace3abfcSBarry Smith   PetscBool isnull;
20263270192aSSatish Balay 
202777ed5343SBarry Smith   PetscFunctionBegin;
20289566063dSJacob Faibussowitsch   PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
20299566063dSJacob Faibussowitsch   PetscCall(PetscDrawIsNull(draw, &isnull));
203045f3bb6eSLisandro Dalcin   if (isnull) PetscFunctionReturn(0);
203177ed5343SBarry Smith 
20329371c9d4SSatish Balay   xr = A->cmap->n;
20339371c9d4SSatish Balay   yr = A->rmap->N;
20349371c9d4SSatish Balay   h  = yr / 10.0;
20359371c9d4SSatish Balay   w  = xr / 10.0;
20369371c9d4SSatish Balay   xr += w;
20379371c9d4SSatish Balay   yr += h;
20389371c9d4SSatish Balay   xl = -w;
20399371c9d4SSatish Balay   yl = -h;
20409566063dSJacob Faibussowitsch   PetscCall(PetscDrawSetCoordinates(draw, xl, yl, xr, yr));
20419566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", (PetscObject)viewer));
20429566063dSJacob Faibussowitsch   PetscCall(PetscDrawZoom(draw, MatView_SeqBAIJ_Draw_Zoom, A));
20439566063dSJacob Faibussowitsch   PetscCall(PetscObjectCompose((PetscObject)A, "Zoomviewer", NULL));
20449566063dSJacob Faibussowitsch   PetscCall(PetscDrawSave(draw));
20453a40ed3dSBarry Smith   PetscFunctionReturn(0);
20463270192aSSatish Balay }
20473270192aSSatish Balay 
2048d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_SeqBAIJ(Mat A, PetscViewer viewer)
2049d71ae5a4SJacob Faibussowitsch {
2050ace3abfcSBarry Smith   PetscBool iascii, isbinary, isdraw;
20512593348eSBarry Smith 
20523a40ed3dSBarry Smith   PetscFunctionBegin;
20539566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
20549566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
20559566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
205632077d6dSBarry Smith   if (iascii) {
20579566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_ASCII(A, viewer));
20580f5bd95cSBarry Smith   } else if (isbinary) {
20599566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Binary(A, viewer));
20600f5bd95cSBarry Smith   } else if (isdraw) {
20619566063dSJacob Faibussowitsch     PetscCall(MatView_SeqBAIJ_Draw(A, viewer));
20625cd90555SBarry Smith   } else {
2063a5e6ed63SBarry Smith     Mat B;
20649566063dSJacob Faibussowitsch     PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
20659566063dSJacob Faibussowitsch     PetscCall(MatView(B, viewer));
20669566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&B));
20672593348eSBarry Smith   }
20683a40ed3dSBarry Smith   PetscFunctionReturn(0);
20692593348eSBarry Smith }
2070b6490206SBarry Smith 
2071d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], PetscScalar v[])
2072d71ae5a4SJacob Faibussowitsch {
2073cd0e1443SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2074c1ac3661SBarry Smith   PetscInt    *rp, k, low, high, t, row, nrow, i, col, l, *aj = a->j;
2075c1ac3661SBarry Smith   PetscInt    *ai = a->i, *ailen = a->ilen;
2076d0f46423SBarry Smith   PetscInt     brow, bcol, ridx, cidx, bs = A->rmap->bs, bs2 = a->bs2;
207797e567efSBarry Smith   MatScalar   *ap, *aa = a->a;
2078cd0e1443SSatish Balay 
20793a40ed3dSBarry Smith   PetscFunctionBegin;
20802d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over rows */
20819371c9d4SSatish Balay     row  = im[k];
20829371c9d4SSatish Balay     brow = row / bs;
20839371c9d4SSatish Balay     if (row < 0) {
20849371c9d4SSatish Balay       v += n;
20859371c9d4SSatish Balay       continue;
20869371c9d4SSatish Balay     } /* negative row */
208754c59aa7SJacob Faibussowitsch     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " too large", row);
2088d29f2997SMatthew Woehlke     rp   = aj ? aj + ai[brow] : NULL;       /* mustn't add to NULL, that is UB */
2089d29f2997SMatthew Woehlke     ap   = aa ? aa + bs2 * ai[brow] : NULL; /* mustn't add to NULL, that is UB */
20902c3acbe9SBarry Smith     nrow = ailen[brow];
20912d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over columns */
20929371c9d4SSatish Balay       if (in[l] < 0) {
20939371c9d4SSatish Balay         v++;
20949371c9d4SSatish Balay         continue;
20959371c9d4SSatish Balay       } /* negative column */
209654c59aa7SJacob Faibussowitsch       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column %" PetscInt_FMT " too large", in[l]);
20972d61bbb3SSatish Balay       col  = in[l];
20982d61bbb3SSatish Balay       bcol = col / bs;
20992d61bbb3SSatish Balay       cidx = col % bs;
21002d61bbb3SSatish Balay       ridx = row % bs;
21012d61bbb3SSatish Balay       high = nrow;
21022d61bbb3SSatish Balay       low  = 0; /* assume unsorted */
21032d61bbb3SSatish Balay       while (high - low > 5) {
2104cd0e1443SSatish Balay         t = (low + high) / 2;
2105cd0e1443SSatish Balay         if (rp[t] > bcol) high = t;
2106cd0e1443SSatish Balay         else low = t;
2107cd0e1443SSatish Balay       }
2108cd0e1443SSatish Balay       for (i = low; i < high; i++) {
2109cd0e1443SSatish Balay         if (rp[i] > bcol) break;
2110cd0e1443SSatish Balay         if (rp[i] == bcol) {
21112d61bbb3SSatish Balay           *v++ = ap[bs2 * i + bs * cidx + ridx];
21122d61bbb3SSatish Balay           goto finished;
2113cd0e1443SSatish Balay         }
2114cd0e1443SSatish Balay       }
211597e567efSBarry Smith       *v++ = 0.0;
21162d61bbb3SSatish Balay     finished:;
2117cd0e1443SSatish Balay     }
2118cd0e1443SSatish Balay   }
21193a40ed3dSBarry Smith   PetscFunctionReturn(0);
2120cd0e1443SSatish Balay }
2121cd0e1443SSatish Balay 
2122d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2123d71ae5a4SJacob Faibussowitsch {
212492c4ed94SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2125e2ee6c50SBarry Smith   PetscInt          *rp, k, low, high, t, ii, jj, row, nrow, i, col, l, rmax, N, lastcol = -1;
2126c1ac3661SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2127d0f46423SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs, stepval;
2128ace3abfcSBarry Smith   PetscBool          roworiented = a->roworiented;
2129dd6ea824SBarry Smith   const PetscScalar *value       = v;
21309d243f67SHong Zhang   MatScalar         *ap = NULL, *aa = a->a, *bap;
213192c4ed94SBarry Smith 
21323a40ed3dSBarry Smith   PetscFunctionBegin;
21330e324ae4SSatish Balay   if (roworiented) {
21340e324ae4SSatish Balay     stepval = (n - 1) * bs;
21350e324ae4SSatish Balay   } else {
21360e324ae4SSatish Balay     stepval = (m - 1) * bs;
21370e324ae4SSatish Balay   }
213892c4ed94SBarry Smith   for (k = 0; k < m; k++) { /* loop over added rows */
213992c4ed94SBarry Smith     row = im[k];
21405ef9f2a5SBarry Smith     if (row < 0) continue;
21416bdcaf15SBarry Smith     PetscCheck(row < a->mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block row index too large %" PetscInt_FMT " max %" PetscInt_FMT, row, a->mbs - 1);
214292c4ed94SBarry Smith     rp = aj + ai[row];
21437dc0baabSHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[row];
214492c4ed94SBarry Smith     rmax = imax[row];
214592c4ed94SBarry Smith     nrow = ailen[row];
214692c4ed94SBarry Smith     low  = 0;
2147c71e6ed7SBarry Smith     high = nrow;
214892c4ed94SBarry Smith     for (l = 0; l < n; l++) { /* loop over added columns */
21495ef9f2a5SBarry Smith       if (in[l] < 0) continue;
21506bdcaf15SBarry Smith       PetscCheck(in[l] < a->nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block column index too large %" PetscInt_FMT " max %" PetscInt_FMT, in[l], a->nbs - 1);
215192c4ed94SBarry Smith       col = in[l];
21527dc0baabSHong Zhang       if (!A->structure_only) {
215392c4ed94SBarry Smith         if (roworiented) {
215453ef36baSBarry Smith           value = v + (k * (stepval + bs) + l) * bs;
21550e324ae4SSatish Balay         } else {
215653ef36baSBarry Smith           value = v + (l * (stepval + bs) + k) * bs;
215792c4ed94SBarry Smith         }
21587dc0baabSHong Zhang       }
215926fbe8dcSKarl Rupp       if (col <= lastcol) low = 0;
216026fbe8dcSKarl Rupp       else high = nrow;
2161e2ee6c50SBarry Smith       lastcol = col;
216292c4ed94SBarry Smith       while (high - low > 7) {
216392c4ed94SBarry Smith         t = (low + high) / 2;
216492c4ed94SBarry Smith         if (rp[t] > col) high = t;
216592c4ed94SBarry Smith         else low = t;
216692c4ed94SBarry Smith       }
216792c4ed94SBarry Smith       for (i = low; i < high; i++) {
216892c4ed94SBarry Smith         if (rp[i] > col) break;
216992c4ed94SBarry Smith         if (rp[i] == col) {
21707dc0baabSHong Zhang           if (A->structure_only) goto noinsert2;
21718a84c255SSatish Balay           bap = ap + bs2 * i;
21720e324ae4SSatish Balay           if (roworiented) {
21738a84c255SSatish Balay             if (is == ADD_VALUES) {
2174dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2175ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
2176dd9472c6SBarry Smith               }
21770e324ae4SSatish Balay             } else {
2178dd9472c6SBarry Smith               for (ii = 0; ii < bs; ii++, value += stepval) {
2179ad540459SPierre Jolivet                 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2180dd9472c6SBarry Smith               }
2181dd9472c6SBarry Smith             }
21820e324ae4SSatish Balay           } else {
21830e324ae4SSatish Balay             if (is == ADD_VALUES) {
218453ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2185ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
218653ef36baSBarry Smith                 bap += bs;
2187dd9472c6SBarry Smith               }
21880e324ae4SSatish Balay             } else {
218953ef36baSBarry Smith               for (ii = 0; ii < bs; ii++, value += bs + stepval) {
2190ad540459SPierre Jolivet                 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
219153ef36baSBarry Smith                 bap += bs;
21928a84c255SSatish Balay               }
2193dd9472c6SBarry Smith             }
2194dd9472c6SBarry Smith           }
2195f1241b54SBarry Smith           goto noinsert2;
219692c4ed94SBarry Smith         }
219792c4ed94SBarry Smith       }
219889280ab3SLois Curfman McInnes       if (nonew == 1) goto noinsert2;
21995f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked index new nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
22007dc0baabSHong Zhang       if (A->structure_only) {
22017dc0baabSHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, row, col, rmax, ai, aj, rp, imax, nonew, MatScalar);
22027dc0baabSHong Zhang       } else {
2203fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
22047dc0baabSHong Zhang       }
22059371c9d4SSatish Balay       N = nrow++ - 1;
22069371c9d4SSatish Balay       high++;
220792c4ed94SBarry Smith       /* shift up all the later entries in this row */
22089566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
220992c4ed94SBarry Smith       rp[i] = col;
22107dc0baabSHong Zhang       if (!A->structure_only) {
22119566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
22128a84c255SSatish Balay         bap = ap + bs2 * i;
22130e324ae4SSatish Balay         if (roworiented) {
2214dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2215ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
2216dd9472c6SBarry Smith           }
22170e324ae4SSatish Balay         } else {
2218dd9472c6SBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
2219ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
2220dd9472c6SBarry Smith           }
2221dd9472c6SBarry Smith         }
22227dc0baabSHong Zhang       }
2223f1241b54SBarry Smith     noinsert2:;
222492c4ed94SBarry Smith       low = i;
222592c4ed94SBarry Smith     }
222692c4ed94SBarry Smith     ailen[row] = nrow;
222792c4ed94SBarry Smith   }
22283a40ed3dSBarry Smith   PetscFunctionReturn(0);
222992c4ed94SBarry Smith }
223026e093fcSHong Zhang 
2231d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A, MatAssemblyType mode)
2232d71ae5a4SJacob Faibussowitsch {
2233584200bdSSatish Balay   Mat_SeqBAIJ *a      = (Mat_SeqBAIJ *)A->data;
2234580bdb30SBarry Smith   PetscInt     fshift = 0, i, *ai = a->i, *aj = a->j, *imax = a->imax;
2235d0f46423SBarry Smith   PetscInt     m = A->rmap->N, *ip, N, *ailen = a->ilen;
2236c1ac3661SBarry Smith   PetscInt     mbs = a->mbs, bs2 = a->bs2, rmax = 0;
22373f1db9ecSBarry Smith   MatScalar   *aa    = a->a, *ap;
22383447b6efSHong Zhang   PetscReal    ratio = 0.6;
2239584200bdSSatish Balay 
22403a40ed3dSBarry Smith   PetscFunctionBegin;
22413a40ed3dSBarry Smith   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2242584200bdSSatish Balay 
224343ee02c3SBarry Smith   if (m) rmax = ailen[0];
2244584200bdSSatish Balay   for (i = 1; i < mbs; i++) {
2245584200bdSSatish Balay     /* move each row back by the amount of empty slots (fshift) before it*/
2246584200bdSSatish Balay     fshift += imax[i - 1] - ailen[i - 1];
2247d402145bSBarry Smith     rmax = PetscMax(rmax, ailen[i]);
2248584200bdSSatish Balay     if (fshift) {
2249580bdb30SBarry Smith       ip = aj + ai[i];
2250580bdb30SBarry Smith       ap = aa + bs2 * ai[i];
2251584200bdSSatish Balay       N  = ailen[i];
22529566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(ip - fshift, ip, N));
225348a46eb9SPierre Jolivet       if (!A->structure_only) PetscCall(PetscArraymove(ap - bs2 * fshift, ap, bs2 * N));
2254672ba085SHong Zhang     }
2255584200bdSSatish Balay     ai[i] = ai[i - 1] + ailen[i - 1];
2256584200bdSSatish Balay   }
2257584200bdSSatish Balay   if (mbs) {
2258584200bdSSatish Balay     fshift += imax[mbs - 1] - ailen[mbs - 1];
2259584200bdSSatish Balay     ai[mbs] = ai[mbs - 1] + ailen[mbs - 1];
2260584200bdSSatish Balay   }
22617c565772SBarry Smith 
2262584200bdSSatish Balay   /* reset ilen and imax for each row */
22637c565772SBarry Smith   a->nonzerorowcnt = 0;
2264672ba085SHong Zhang   if (A->structure_only) {
22659566063dSJacob Faibussowitsch     PetscCall(PetscFree2(a->imax, a->ilen));
2266672ba085SHong Zhang   } else { /* !A->structure_only */
2267584200bdSSatish Balay     for (i = 0; i < mbs; i++) {
2268584200bdSSatish Balay       ailen[i] = imax[i] = ai[i + 1] - ai[i];
22697c565772SBarry Smith       a->nonzerorowcnt += ((ai[i + 1] - ai[i]) > 0);
2270584200bdSSatish Balay     }
2271672ba085SHong Zhang   }
2272a7c10996SSatish Balay   a->nz = ai[mbs];
2273584200bdSSatish Balay 
2274584200bdSSatish Balay   /* diagonals may have moved, so kill the diagonal pointers */
2275b01c7715SBarry Smith   a->idiagvalid = PETSC_FALSE;
2276584200bdSSatish Balay   if (fshift && a->diag) {
22779566063dSJacob Faibussowitsch     PetscCall(PetscFree(a->diag));
2278f4259b30SLisandro Dalcin     a->diag = NULL;
2279584200bdSSatish Balay   }
22805f80ce2aSJacob Faibussowitsch   if (fshift) PetscCheck(a->nounused != -1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unused space detected in matrix: %" PetscInt_FMT " X %" PetscInt_FMT " block size %" PetscInt_FMT ", %" PetscInt_FMT " unneeded", m, A->cmap->n, A->rmap->bs, fshift * bs2);
22819566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Matrix size: %" PetscInt_FMT " X %" PetscInt_FMT ", block size %" PetscInt_FMT "; storage space: %" PetscInt_FMT " unneeded, %" PetscInt_FMT " used\n", m, A->cmap->n, A->rmap->bs, fshift * bs2, a->nz * bs2));
22829566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Number of mallocs during MatSetValues is %" PetscInt_FMT "\n", a->reallocs));
22839566063dSJacob Faibussowitsch   PetscCall(PetscInfo(A, "Most nonzeros blocks in any row is %" PetscInt_FMT "\n", rmax));
228426fbe8dcSKarl Rupp 
22858e58a170SBarry Smith   A->info.mallocs += a->reallocs;
2286e2f3b5e9SSatish Balay   a->reallocs         = 0;
22870e6d2581SBarry Smith   A->info.nz_unneeded = (PetscReal)fshift * bs2;
2288647a6520SHong Zhang   a->rmax             = rmax;
2289cf4441caSHong Zhang 
229048a46eb9SPierre Jolivet   if (!A->structure_only) PetscCall(MatCheckCompressedRow(A, a->nonzerorowcnt, &a->compressedrow, a->i, mbs, ratio));
22913a40ed3dSBarry Smith   PetscFunctionReturn(0);
2292584200bdSSatish Balay }
2293584200bdSSatish Balay 
2294bea157c4SSatish Balay /*
2295bea157c4SSatish Balay    This function returns an array of flags which indicate the locations of contiguous
2296bea157c4SSatish Balay    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2297a5b23f4aSJose E. Roman    then the resulting sizes = [3,1,1,3,1] corresponding to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2298bea157c4SSatish Balay    Assume: sizes should be long enough to hold all the values.
2299bea157c4SSatish Balay */
2300d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[], PetscInt n, PetscInt bs, PetscInt sizes[], PetscInt *bs_max)
2301d71ae5a4SJacob Faibussowitsch {
2302c1ac3661SBarry Smith   PetscInt  i, j, k, row;
2303ace3abfcSBarry Smith   PetscBool flg;
23043a40ed3dSBarry Smith 
2305433994e6SBarry Smith   PetscFunctionBegin;
2306bea157c4SSatish Balay   for (i = 0, j = 0; i < n; j++) {
2307bea157c4SSatish Balay     row = idx[i];
2308a5b23f4aSJose E. Roman     if (row % bs != 0) { /* Not the beginning of a block */
2309bea157c4SSatish Balay       sizes[j] = 1;
2310bea157c4SSatish Balay       i++;
2311e4fda26cSSatish Balay     } else if (i + bs > n) { /* complete block doesn't exist (at idx end) */
2312bea157c4SSatish Balay       sizes[j] = 1;          /* Also makes sure at least 'bs' values exist for next else */
2313bea157c4SSatish Balay       i++;
23146aad120cSJose E. Roman     } else { /* Beginning of the block, so check if the complete block exists */
2315bea157c4SSatish Balay       flg = PETSC_TRUE;
2316bea157c4SSatish Balay       for (k = 1; k < bs; k++) {
2317bea157c4SSatish Balay         if (row + k != idx[i + k]) { /* break in the block */
2318bea157c4SSatish Balay           flg = PETSC_FALSE;
2319bea157c4SSatish Balay           break;
2320d9b7c43dSSatish Balay         }
2321bea157c4SSatish Balay       }
2322abc0a331SBarry Smith       if (flg) { /* No break in the bs */
2323bea157c4SSatish Balay         sizes[j] = bs;
2324bea157c4SSatish Balay         i += bs;
2325bea157c4SSatish Balay       } else {
2326bea157c4SSatish Balay         sizes[j] = 1;
2327bea157c4SSatish Balay         i++;
2328bea157c4SSatish Balay       }
2329bea157c4SSatish Balay     }
2330bea157c4SSatish Balay   }
2331bea157c4SSatish Balay   *bs_max = j;
23323a40ed3dSBarry Smith   PetscFunctionReturn(0);
2333d9b7c43dSSatish Balay }
2334d9b7c43dSSatish Balay 
2335d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRows_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2336d71ae5a4SJacob Faibussowitsch {
2337d9b7c43dSSatish Balay   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
2338f4df32b1SMatthew Knepley   PetscInt           i, j, k, count, *rows;
2339d0f46423SBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, *sizes, row, bs_max;
234087828ca2SBarry Smith   PetscScalar        zero = 0.0;
23413f1db9ecSBarry Smith   MatScalar         *aa;
234297b48c8fSBarry Smith   const PetscScalar *xx;
234397b48c8fSBarry Smith   PetscScalar       *bb;
2344d9b7c43dSSatish Balay 
23453a40ed3dSBarry Smith   PetscFunctionBegin;
234697b48c8fSBarry Smith   /* fix right hand side if needed */
234797b48c8fSBarry Smith   if (x && b) {
23489566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
23499566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
2350ad540459SPierre Jolivet     for (i = 0; i < is_n; i++) bb[is_idx[i]] = diag * xx[is_idx[i]];
23519566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
23529566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
235397b48c8fSBarry Smith   }
235497b48c8fSBarry Smith 
2355d9b7c43dSSatish Balay   /* Make a copy of the IS and  sort it */
2356bea157c4SSatish Balay   /* allocate memory for rows,sizes */
23579566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(is_n, &rows, 2 * is_n, &sizes));
2358bea157c4SSatish Balay 
2359563b5814SBarry Smith   /* copy IS values to rows, and sort them */
236026fbe8dcSKarl Rupp   for (i = 0; i < is_n; i++) rows[i] = is_idx[i];
23619566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(is_n, rows));
236297b48c8fSBarry Smith 
2363a9817697SBarry Smith   if (baij->keepnonzeropattern) {
236426fbe8dcSKarl Rupp     for (i = 0; i < is_n; i++) sizes[i] = 1;
2365dffd3267SBarry Smith     bs_max = is_n;
2366dffd3267SBarry Smith   } else {
23679566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ_Check_Blocks(rows, is_n, bs, sizes, &bs_max));
2368e56f5c9eSBarry Smith     A->nonzerostate++;
2369dffd3267SBarry Smith   }
2370bea157c4SSatish Balay 
2371bea157c4SSatish Balay   for (i = 0, j = 0; i < bs_max; j += sizes[i], i++) {
2372bea157c4SSatish Balay     row = rows[j];
23735f80ce2aSJacob Faibussowitsch     PetscCheck(row >= 0 && row <= A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", row);
2374bea157c4SSatish Balay     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
2375b31fbe3bSSatish Balay     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
2376a9817697SBarry Smith     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2377d4a378daSJed Brown       if (diag != (PetscScalar)0.0) {
2378bea157c4SSatish Balay         if (baij->ilen[row / bs] > 0) {
2379bea157c4SSatish Balay           baij->ilen[row / bs]       = 1;
2380bea157c4SSatish Balay           baij->j[baij->i[row / bs]] = row / bs;
238126fbe8dcSKarl Rupp 
23829566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(aa, count * bs));
2383a07cd24cSSatish Balay         }
2384563b5814SBarry Smith         /* Now insert all the diagonal values for this bs */
238548a46eb9SPierre Jolivet         for (k = 0; k < bs; k++) PetscCall((*A->ops->setvalues)(A, 1, rows + j + k, 1, rows + j + k, &diag, INSERT_VALUES));
2386f4df32b1SMatthew Knepley       } else { /* (diag == 0.0) */
2387bea157c4SSatish Balay         baij->ilen[row / bs] = 0;
2388f4df32b1SMatthew Knepley       }      /* end (diag == 0.0) */
2389bea157c4SSatish Balay     } else { /* (sizes[i] != bs) */
23906bdcaf15SBarry Smith       PetscAssert(sizes[i] == 1, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal Error. Value should be 1");
2391bea157c4SSatish Balay       for (k = 0; k < count; k++) {
2392d9b7c43dSSatish Balay         aa[0] = zero;
2393d9b7c43dSSatish Balay         aa += bs;
2394d9b7c43dSSatish Balay       }
239548a46eb9SPierre Jolivet       if (diag != (PetscScalar)0.0) PetscCall((*A->ops->setvalues)(A, 1, rows + j, 1, rows + j, &diag, INSERT_VALUES));
2396d9b7c43dSSatish Balay     }
2397bea157c4SSatish Balay   }
2398bea157c4SSatish Balay 
23999566063dSJacob Faibussowitsch   PetscCall(PetscFree2(rows, sizes));
24009566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
24013a40ed3dSBarry Smith   PetscFunctionReturn(0);
2402d9b7c43dSSatish Balay }
24031c351548SSatish Balay 
2404d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A, PetscInt is_n, const PetscInt is_idx[], PetscScalar diag, Vec x, Vec b)
2405d71ae5a4SJacob Faibussowitsch {
240697b48c8fSBarry Smith   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)A->data;
240797b48c8fSBarry Smith   PetscInt           i, j, k, count;
240897b48c8fSBarry Smith   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2, row, col;
240997b48c8fSBarry Smith   PetscScalar        zero = 0.0;
241097b48c8fSBarry Smith   MatScalar         *aa;
241197b48c8fSBarry Smith   const PetscScalar *xx;
241297b48c8fSBarry Smith   PetscScalar       *bb;
241356777dd2SBarry Smith   PetscBool         *zeroed, vecs = PETSC_FALSE;
241497b48c8fSBarry Smith 
241597b48c8fSBarry Smith   PetscFunctionBegin;
241697b48c8fSBarry Smith   /* fix right hand side if needed */
241797b48c8fSBarry Smith   if (x && b) {
24189566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
24199566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
242056777dd2SBarry Smith     vecs = PETSC_TRUE;
242197b48c8fSBarry Smith   }
242297b48c8fSBarry Smith 
242397b48c8fSBarry Smith   /* zero the columns */
24249566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(A->rmap->n, &zeroed));
242597b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
24265f80ce2aSJacob Faibussowitsch     PetscCheck(is_idx[i] >= 0 && is_idx[i] < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "row %" PetscInt_FMT " out of range", is_idx[i]);
242797b48c8fSBarry Smith     zeroed[is_idx[i]] = PETSC_TRUE;
242897b48c8fSBarry Smith   }
242997b48c8fSBarry Smith   for (i = 0; i < A->rmap->N; i++) {
243097b48c8fSBarry Smith     if (!zeroed[i]) {
243197b48c8fSBarry Smith       row = i / bs;
243297b48c8fSBarry Smith       for (j = baij->i[row]; j < baij->i[row + 1]; j++) {
243397b48c8fSBarry Smith         for (k = 0; k < bs; k++) {
243497b48c8fSBarry Smith           col = bs * baij->j[j] + k;
243597b48c8fSBarry Smith           if (zeroed[col]) {
243697b48c8fSBarry Smith             aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
243756777dd2SBarry Smith             if (vecs) bb[i] -= aa[0] * xx[col];
243897b48c8fSBarry Smith             aa[0] = 0.0;
243997b48c8fSBarry Smith           }
244097b48c8fSBarry Smith         }
244197b48c8fSBarry Smith       }
244256777dd2SBarry Smith     } else if (vecs) bb[i] = diag * xx[i];
244397b48c8fSBarry Smith   }
24449566063dSJacob Faibussowitsch   PetscCall(PetscFree(zeroed));
244556777dd2SBarry Smith   if (vecs) {
24469566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
24479566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
244856777dd2SBarry Smith   }
244997b48c8fSBarry Smith 
245097b48c8fSBarry Smith   /* zero the rows */
245197b48c8fSBarry Smith   for (i = 0; i < is_n; i++) {
245297b48c8fSBarry Smith     row   = is_idx[i];
245397b48c8fSBarry Smith     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
245497b48c8fSBarry Smith     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
245597b48c8fSBarry Smith     for (k = 0; k < count; k++) {
245697b48c8fSBarry Smith       aa[0] = zero;
245797b48c8fSBarry Smith       aa += bs;
245897b48c8fSBarry Smith     }
2459dbbe0bcdSBarry Smith     if (diag != (PetscScalar)0.0) PetscUseTypeMethod(A, setvalues, 1, &row, 1, &row, &diag, INSERT_VALUES);
246097b48c8fSBarry Smith   }
24619566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd_SeqBAIJ(A, MAT_FINAL_ASSEMBLY));
246297b48c8fSBarry Smith   PetscFunctionReturn(0);
246397b48c8fSBarry Smith }
246497b48c8fSBarry Smith 
2465d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_SeqBAIJ(Mat A, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode is)
2466d71ae5a4SJacob Faibussowitsch {
24672d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2468e2ee6c50SBarry Smith   PetscInt    *rp, k, low, high, t, ii, row, nrow, i, col, l, rmax, N, lastcol = -1;
2469c1ac3661SBarry Smith   PetscInt    *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2470d0f46423SBarry Smith   PetscInt    *aj = a->j, nonew = a->nonew, bs = A->rmap->bs, brow, bcol;
2471c1ac3661SBarry Smith   PetscInt     ridx, cidx, bs2                 = a->bs2;
2472ace3abfcSBarry Smith   PetscBool    roworiented = a->roworiented;
2473d8cdefa3SHong Zhang   MatScalar   *ap = NULL, value = 0.0, *aa = a->a, *bap;
24742d61bbb3SSatish Balay 
24752d61bbb3SSatish Balay   PetscFunctionBegin;
24762d61bbb3SSatish Balay   for (k = 0; k < m; k++) { /* loop over added rows */
2477085a36d4SBarry Smith     row  = im[k];
2478085a36d4SBarry Smith     brow = row / bs;
24795ef9f2a5SBarry Smith     if (row < 0) continue;
24806bdcaf15SBarry Smith     PetscCheck(row < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, row, A->rmap->N - 1);
24812d61bbb3SSatish Balay     rp = aj + ai[brow];
2482672ba085SHong Zhang     if (!A->structure_only) ap = aa + bs2 * ai[brow];
24832d61bbb3SSatish Balay     rmax = imax[brow];
24842d61bbb3SSatish Balay     nrow = ailen[brow];
24852d61bbb3SSatish Balay     low  = 0;
2486c71e6ed7SBarry Smith     high = nrow;
24872d61bbb3SSatish Balay     for (l = 0; l < n; l++) { /* loop over added columns */
24885ef9f2a5SBarry Smith       if (in[l] < 0) continue;
24896bdcaf15SBarry Smith       PetscCheck(in[l] < A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[l], A->cmap->n - 1);
24909371c9d4SSatish Balay       col  = in[l];
24919371c9d4SSatish Balay       bcol = col / bs;
24929371c9d4SSatish Balay       ridx = row % bs;
24939371c9d4SSatish Balay       cidx = col % bs;
2494672ba085SHong Zhang       if (!A->structure_only) {
24952d61bbb3SSatish Balay         if (roworiented) {
24965ef9f2a5SBarry Smith           value = v[l + k * n];
24972d61bbb3SSatish Balay         } else {
24982d61bbb3SSatish Balay           value = v[k + l * m];
24992d61bbb3SSatish Balay         }
2500672ba085SHong Zhang       }
25019371c9d4SSatish Balay       if (col <= lastcol) low = 0;
25029371c9d4SSatish Balay       else high = nrow;
2503e2ee6c50SBarry Smith       lastcol = col;
25042d61bbb3SSatish Balay       while (high - low > 7) {
25052d61bbb3SSatish Balay         t = (low + high) / 2;
25062d61bbb3SSatish Balay         if (rp[t] > bcol) high = t;
25072d61bbb3SSatish Balay         else low = t;
25082d61bbb3SSatish Balay       }
25092d61bbb3SSatish Balay       for (i = low; i < high; i++) {
25102d61bbb3SSatish Balay         if (rp[i] > bcol) break;
25112d61bbb3SSatish Balay         if (rp[i] == bcol) {
25122d61bbb3SSatish Balay           bap = ap + bs2 * i + bs * cidx + ridx;
2513672ba085SHong Zhang           if (!A->structure_only) {
25142d61bbb3SSatish Balay             if (is == ADD_VALUES) *bap += value;
25152d61bbb3SSatish Balay             else *bap = value;
2516672ba085SHong Zhang           }
25172d61bbb3SSatish Balay           goto noinsert1;
25182d61bbb3SSatish Balay         }
25192d61bbb3SSatish Balay       }
25202d61bbb3SSatish Balay       if (nonew == 1) goto noinsert1;
25215f80ce2aSJacob Faibussowitsch       PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", row, col);
2522672ba085SHong Zhang       if (A->structure_only) {
2523672ba085SHong Zhang         MatSeqXAIJReallocateAIJ_structure_only(A, a->mbs, bs2, nrow, brow, bcol, rmax, ai, aj, rp, imax, nonew, MatScalar);
2524672ba085SHong Zhang       } else {
2525fef13f97SBarry Smith         MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
2526672ba085SHong Zhang       }
25279371c9d4SSatish Balay       N = nrow++ - 1;
25289371c9d4SSatish Balay       high++;
25292d61bbb3SSatish Balay       /* shift up all the later entries in this row */
25309566063dSJacob Faibussowitsch       PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
25312d61bbb3SSatish Balay       rp[i] = bcol;
2532580bdb30SBarry Smith       if (!A->structure_only) {
25339566063dSJacob Faibussowitsch         PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
25349566063dSJacob Faibussowitsch         PetscCall(PetscArrayzero(ap + bs2 * i, bs2));
2535580bdb30SBarry Smith         ap[bs2 * i + bs * cidx + ridx] = value;
2536580bdb30SBarry Smith       }
2537085a36d4SBarry Smith       a->nz++;
2538e56f5c9eSBarry Smith       A->nonzerostate++;
25392d61bbb3SSatish Balay     noinsert1:;
25402d61bbb3SSatish Balay       low = i;
25412d61bbb3SSatish Balay     }
25422d61bbb3SSatish Balay     ailen[brow] = nrow;
25432d61bbb3SSatish Balay   }
25442d61bbb3SSatish Balay   PetscFunctionReturn(0);
25452d61bbb3SSatish Balay }
25462d61bbb3SSatish Balay 
2547d71ae5a4SJacob Faibussowitsch PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA, IS row, IS col, const MatFactorInfo *info)
2548d71ae5a4SJacob Faibussowitsch {
25492d61bbb3SSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inA->data;
25502d61bbb3SSatish Balay   Mat          outA;
2551ace3abfcSBarry Smith   PetscBool    row_identity, col_identity;
25522d61bbb3SSatish Balay 
25532d61bbb3SSatish Balay   PetscFunctionBegin;
25545f80ce2aSJacob Faibussowitsch   PetscCheck(info->levels == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only levels = 0 supported for in-place ILU");
25559566063dSJacob Faibussowitsch   PetscCall(ISIdentity(row, &row_identity));
25569566063dSJacob Faibussowitsch   PetscCall(ISIdentity(col, &col_identity));
25575f80ce2aSJacob Faibussowitsch   PetscCheck(row_identity && col_identity, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Row and column permutations must be identity for in-place ILU");
25582d61bbb3SSatish Balay 
25592d61bbb3SSatish Balay   outA            = inA;
2560d5f3da31SBarry Smith   inA->factortype = MAT_FACTOR_LU;
25619566063dSJacob Faibussowitsch   PetscCall(PetscFree(inA->solvertype));
25629566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERPETSC, &inA->solvertype));
25632d61bbb3SSatish Balay 
25649566063dSJacob Faibussowitsch   PetscCall(MatMarkDiagonal_SeqBAIJ(inA));
2565cf242676SKris Buschelman 
25669566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)row));
25679566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->row));
2568c3122656SLisandro Dalcin   a->row = row;
25699566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)col));
25709566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->col));
2571c3122656SLisandro Dalcin   a->col = col;
2572c38d4ed2SBarry Smith 
2573c38d4ed2SBarry Smith   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
25749566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&a->icol));
25759566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(col, PETSC_DECIDE, &a->icol));
2576c38d4ed2SBarry Smith 
25779566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(inA, (PetscBool)(row_identity && col_identity)));
25784dfa11a4SJacob Faibussowitsch   if (!a->solve_work) { PetscCall(PetscMalloc1(inA->rmap->N + inA->rmap->bs, &a->solve_work)); }
25799566063dSJacob Faibussowitsch   PetscCall(MatLUFactorNumeric(outA, inA, info));
25802d61bbb3SSatish Balay   PetscFunctionReturn(0);
25812d61bbb3SSatish Balay }
2582d9b7c43dSSatish Balay 
2583d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat, PetscInt *indices)
2584d71ae5a4SJacob Faibussowitsch {
258527a8da17SBarry Smith   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2586bdb1c0e1SJed Brown   PetscInt     i, nz, mbs;
258727a8da17SBarry Smith 
258827a8da17SBarry Smith   PetscFunctionBegin;
2589b32cb4a7SJed Brown   nz  = baij->maxnz;
2590bdb1c0e1SJed Brown   mbs = baij->mbs;
2591ad540459SPierre Jolivet   for (i = 0; i < nz; i++) baij->j[i] = indices[i];
259227a8da17SBarry Smith   baij->nz = nz;
2593ad540459SPierre Jolivet   for (i = 0; i < mbs; i++) baij->ilen[i] = baij->imax[i];
259427a8da17SBarry Smith   PetscFunctionReturn(0);
259527a8da17SBarry Smith }
259627a8da17SBarry Smith 
259727a8da17SBarry Smith /*@
259811a5261eSBarry Smith     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows in the matrix.
259927a8da17SBarry Smith 
260027a8da17SBarry Smith   Input Parameters:
260111a5261eSBarry Smith +  mat - the `MATSEQBAIJ` matrix
260227a8da17SBarry Smith -  indices - the column indices
260327a8da17SBarry Smith 
260415091d37SBarry Smith   Level: advanced
260515091d37SBarry Smith 
260627a8da17SBarry Smith   Notes:
260727a8da17SBarry Smith     This can be called if you have precomputed the nonzero structure of the
260827a8da17SBarry Smith   matrix and want to provide it to the matrix object to improve the performance
260911a5261eSBarry Smith   of the `MatSetValues()` operation.
261027a8da17SBarry Smith 
261127a8da17SBarry Smith     You MUST have set the correct numbers of nonzeros per row in the call to
261211a5261eSBarry Smith   `MatCreateSeqBAIJ()`, and the columns indices MUST be sorted.
261327a8da17SBarry Smith 
261411a5261eSBarry Smith     MUST be called before any calls to `MatSetValues()`
261527a8da17SBarry Smith 
261611a5261eSBarry Smith .seealso: `MATSEQBAIJ`, `MatSetValues()`
261727a8da17SBarry Smith @*/
2618d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat, PetscInt *indices)
2619d71ae5a4SJacob Faibussowitsch {
262027a8da17SBarry Smith   PetscFunctionBegin;
26210700a824SBarry Smith   PetscValidHeaderSpecific(mat, MAT_CLASSID, 1);
2622dadcf809SJacob Faibussowitsch   PetscValidIntPointer(indices, 2);
2623cac4c232SBarry Smith   PetscUseMethod(mat, "MatSeqBAIJSetColumnIndices_C", (Mat, PetscInt *), (mat, indices));
262427a8da17SBarry Smith   PetscFunctionReturn(0);
262527a8da17SBarry Smith }
262627a8da17SBarry Smith 
2627d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A, Vec v, PetscInt idx[])
2628d71ae5a4SJacob Faibussowitsch {
2629273d9f13SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2630c1ac3661SBarry Smith   PetscInt     i, j, n, row, bs, *ai, *aj, mbs;
2631273d9f13SBarry Smith   PetscReal    atmp;
263287828ca2SBarry Smith   PetscScalar *x, zero = 0.0;
2633273d9f13SBarry Smith   MatScalar   *aa;
2634c1ac3661SBarry Smith   PetscInt     ncols, brow, krow, kcol;
2635273d9f13SBarry Smith 
2636273d9f13SBarry Smith   PetscFunctionBegin;
26375f80ce2aSJacob Faibussowitsch   /* why is this not a macro???????????????????????????????????????????????????????????????? */
26385f80ce2aSJacob Faibussowitsch   PetscCheck(!A->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
2639d0f46423SBarry Smith   bs  = A->rmap->bs;
2640273d9f13SBarry Smith   aa  = a->a;
2641273d9f13SBarry Smith   ai  = a->i;
2642273d9f13SBarry Smith   aj  = a->j;
2643273d9f13SBarry Smith   mbs = a->mbs;
2644273d9f13SBarry Smith 
26459566063dSJacob Faibussowitsch   PetscCall(VecSet(v, zero));
26469566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &x));
26479566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(v, &n));
26485f80ce2aSJacob Faibussowitsch   PetscCheck(n == A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Nonconforming matrix and vector");
2649273d9f13SBarry Smith   for (i = 0; i < mbs; i++) {
26509371c9d4SSatish Balay     ncols = ai[1] - ai[0];
26519371c9d4SSatish Balay     ai++;
2652273d9f13SBarry Smith     brow = bs * i;
2653273d9f13SBarry Smith     for (j = 0; j < ncols; j++) {
2654273d9f13SBarry Smith       for (kcol = 0; kcol < bs; kcol++) {
2655273d9f13SBarry Smith         for (krow = 0; krow < bs; krow++) {
26569371c9d4SSatish Balay           atmp = PetscAbsScalar(*aa);
26579371c9d4SSatish Balay           aa++;
2658273d9f13SBarry Smith           row = brow + krow; /* row index */
26599371c9d4SSatish Balay           if (PetscAbsScalar(x[row]) < atmp) {
26609371c9d4SSatish Balay             x[row] = atmp;
26619371c9d4SSatish Balay             if (idx) idx[row] = bs * (*aj) + kcol;
26629371c9d4SSatish Balay           }
2663273d9f13SBarry Smith         }
2664273d9f13SBarry Smith       }
2665273d9f13SBarry Smith       aj++;
2666273d9f13SBarry Smith     }
2667273d9f13SBarry Smith   }
26689566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &x));
2669273d9f13SBarry Smith   PetscFunctionReturn(0);
2670273d9f13SBarry Smith }
2671273d9f13SBarry Smith 
2672d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_SeqBAIJ(Mat A, Mat B, MatStructure str)
2673d71ae5a4SJacob Faibussowitsch {
26743c896bc6SHong Zhang   PetscFunctionBegin;
26753c896bc6SHong Zhang   /* If the two matrices have the same copy implementation, use fast copy. */
26763c896bc6SHong Zhang   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
26773c896bc6SHong Zhang     Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)A->data;
26783c896bc6SHong Zhang     Mat_SeqBAIJ *b    = (Mat_SeqBAIJ *)B->data;
2679d88c0aacSHong Zhang     PetscInt     ambs = a->mbs, bmbs = b->mbs, abs = A->rmap->bs, bbs = B->rmap->bs, bs2 = abs * abs;
26803c896bc6SHong Zhang 
26815f80ce2aSJacob Faibussowitsch     PetscCheck(a->i[ambs] == b->i[bmbs], PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Number of nonzero blocks in matrices A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", a->i[ambs], b->i[bmbs]);
26825f80ce2aSJacob Faibussowitsch     PetscCheck(abs == bbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Block size A %" PetscInt_FMT " and B %" PetscInt_FMT " are different", abs, bbs);
26839566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(b->a, a->a, bs2 * a->i[ambs]));
26849566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)B));
26853c896bc6SHong Zhang   } else {
26869566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
26873c896bc6SHong Zhang   }
26883c896bc6SHong Zhang   PetscFunctionReturn(0);
26893c896bc6SHong Zhang }
26903c896bc6SHong Zhang 
2691d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2692d71ae5a4SJacob Faibussowitsch {
2693273d9f13SBarry Smith   PetscFunctionBegin;
26949566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL));
2695273d9f13SBarry Smith   PetscFunctionReturn(0);
2696273d9f13SBarry Smith }
2697273d9f13SBarry Smith 
2698d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A, PetscScalar *array[])
2699d71ae5a4SJacob Faibussowitsch {
2700f2a5309cSSatish Balay   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27016e111a19SKarl Rupp 
2702f2a5309cSSatish Balay   PetscFunctionBegin;
2703f2a5309cSSatish Balay   *array = a->a;
2704f2a5309cSSatish Balay   PetscFunctionReturn(0);
2705f2a5309cSSatish Balay }
2706f2a5309cSSatish Balay 
2707d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A, PetscScalar *array[])
2708d71ae5a4SJacob Faibussowitsch {
2709f2a5309cSSatish Balay   PetscFunctionBegin;
2710cda14afcSprj-   *array = NULL;
2711f2a5309cSSatish Balay   PetscFunctionReturn(0);
2712f2a5309cSSatish Balay }
2713f2a5309cSSatish Balay 
2714d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y, Mat X, PetscInt *nnz)
2715d71ae5a4SJacob Faibussowitsch {
2716b264fe52SHong Zhang   PetscInt     bs = Y->rmap->bs, mbs = Y->rmap->N / bs;
271752768537SHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
271852768537SHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
271952768537SHong Zhang 
272052768537SHong Zhang   PetscFunctionBegin;
272152768537SHong Zhang   /* Set the number of nonzeros in the new matrix */
27229566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_SeqX_private(mbs, x->i, x->j, y->i, y->j, nnz));
272352768537SHong Zhang   PetscFunctionReturn(0);
272452768537SHong Zhang }
272552768537SHong Zhang 
2726d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_SeqBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
2727d71ae5a4SJacob Faibussowitsch {
272842ee4b1aSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data, *y = (Mat_SeqBAIJ *)Y->data;
272931ce2d13SHong Zhang   PetscInt     bs = Y->rmap->bs, bs2 = bs * bs;
2730e838b9e7SJed Brown   PetscBLASInt one = 1;
273142ee4b1aSHong Zhang 
273242ee4b1aSHong Zhang   PetscFunctionBegin;
2733134adf20SPierre Jolivet   if (str == UNKNOWN_NONZERO_PATTERN || (PetscDefined(USE_DEBUG) && str == SAME_NONZERO_PATTERN)) {
2734134adf20SPierre Jolivet     PetscBool e = x->nz == y->nz && x->mbs == y->mbs && bs == X->rmap->bs ? PETSC_TRUE : PETSC_FALSE;
2735134adf20SPierre Jolivet     if (e) {
27369566063dSJacob Faibussowitsch       PetscCall(PetscArraycmp(x->i, y->i, x->mbs + 1, &e));
2737134adf20SPierre Jolivet       if (e) {
27389566063dSJacob Faibussowitsch         PetscCall(PetscArraycmp(x->j, y->j, x->i[x->mbs], &e));
2739134adf20SPierre Jolivet         if (e) str = SAME_NONZERO_PATTERN;
2740134adf20SPierre Jolivet       }
2741134adf20SPierre Jolivet     }
274254c59aa7SJacob Faibussowitsch     if (!e) PetscCheck(str != SAME_NONZERO_PATTERN, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "MatStructure is not SAME_NONZERO_PATTERN");
2743134adf20SPierre Jolivet   }
274442ee4b1aSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
2745f4df32b1SMatthew Knepley     PetscScalar  alpha = a;
2746c5df96a5SBarry Smith     PetscBLASInt bnz;
27479566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
2748792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
27499566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
2750ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
27519566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
275242ee4b1aSHong Zhang   } else {
275352768537SHong Zhang     Mat       B;
275452768537SHong Zhang     PetscInt *nnz;
275554c59aa7SJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
27569566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(Y->rmap->N, &nnz));
27579566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
27589566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
27599566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
27609566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
27619566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, (MatType)((PetscObject)Y)->type_name));
27629566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(Y, X, nnz));
27639566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
27649566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
27659566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
27669566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
276742ee4b1aSHong Zhang   }
276842ee4b1aSHong Zhang   PetscFunctionReturn(0);
276942ee4b1aSHong Zhang }
277042ee4b1aSHong Zhang 
2771d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2772d71ae5a4SJacob Faibussowitsch {
27732726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX)
27742726fb6dSPierre Jolivet   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
27752726fb6dSPierre Jolivet   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
27762726fb6dSPierre Jolivet   MatScalar   *aa = a->a;
27772726fb6dSPierre Jolivet 
27782726fb6dSPierre Jolivet   PetscFunctionBegin;
27792726fb6dSPierre Jolivet   for (i = 0; i < nz; i++) aa[i] = PetscConj(aa[i]);
27802726fb6dSPierre Jolivet #else
27812726fb6dSPierre Jolivet   PetscFunctionBegin;
27822726fb6dSPierre Jolivet #endif
27832726fb6dSPierre Jolivet   PetscFunctionReturn(0);
27842726fb6dSPierre Jolivet }
27852726fb6dSPierre Jolivet 
2786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2787d71ae5a4SJacob Faibussowitsch {
278899cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
278999cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2790dd6ea824SBarry Smith   MatScalar   *aa = a->a;
279199cafbc1SBarry Smith 
279299cafbc1SBarry Smith   PetscFunctionBegin;
279399cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscRealPart(aa[i]);
279499cafbc1SBarry Smith   PetscFunctionReturn(0);
279599cafbc1SBarry Smith }
279699cafbc1SBarry Smith 
2797d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2798d71ae5a4SJacob Faibussowitsch {
279999cafbc1SBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
280099cafbc1SBarry Smith   PetscInt     i, nz = a->bs2 * a->i[a->mbs];
2801dd6ea824SBarry Smith   MatScalar   *aa = a->a;
280299cafbc1SBarry Smith 
280399cafbc1SBarry Smith   PetscFunctionBegin;
280499cafbc1SBarry Smith   for (i = 0; i < nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
280599cafbc1SBarry Smith   PetscFunctionReturn(0);
280699cafbc1SBarry Smith }
280799cafbc1SBarry Smith 
28083acb8795SBarry Smith /*
28092479783cSJose E. Roman     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
28103acb8795SBarry Smith */
2811d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2812d71ae5a4SJacob Faibussowitsch {
28133acb8795SBarry Smith   Mat_SeqBAIJ *a  = (Mat_SeqBAIJ *)A->data;
28143acb8795SBarry Smith   PetscInt     bs = A->rmap->bs, i, *collengths, *cia, *cja, n = A->cmap->n / bs, m = A->rmap->n / bs;
28153acb8795SBarry Smith   PetscInt     nz = a->i[m], row, *jj, mr, col;
28163acb8795SBarry Smith 
28173acb8795SBarry Smith   PetscFunctionBegin;
28183acb8795SBarry Smith   *nn = n;
28193acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
28205f80ce2aSJacob Faibussowitsch   PetscCheck(!symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for BAIJ matrices");
28219566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28229566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28243acb8795SBarry Smith   jj = a->j;
2825ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
28263acb8795SBarry Smith   cia[0] = oshift;
2827ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28289566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
28293acb8795SBarry Smith   jj = a->j;
28303acb8795SBarry Smith   for (row = 0; row < m; row++) {
28313acb8795SBarry Smith     mr = a->i[row + 1] - a->i[row];
28323acb8795SBarry Smith     for (i = 0; i < mr; i++) {
28333acb8795SBarry Smith       col = *jj++;
283426fbe8dcSKarl Rupp 
28353acb8795SBarry Smith       cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
28363acb8795SBarry Smith     }
28373acb8795SBarry Smith   }
28389566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
28399371c9d4SSatish Balay   *ia = cia;
28409371c9d4SSatish Balay   *ja = cja;
28413acb8795SBarry Smith   PetscFunctionReturn(0);
28423acb8795SBarry Smith }
28433acb8795SBarry Smith 
2844d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done)
2845d71ae5a4SJacob Faibussowitsch {
28463acb8795SBarry Smith   PetscFunctionBegin;
28473acb8795SBarry Smith   if (!ia) PetscFunctionReturn(0);
28489566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ia));
28499566063dSJacob Faibussowitsch   PetscCall(PetscFree(*ja));
28503acb8795SBarry Smith   PetscFunctionReturn(0);
28513acb8795SBarry Smith }
28523acb8795SBarry Smith 
2853525d23c0SHong Zhang /*
2854525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2855525d23c0SHong Zhang  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2856040ebd07SHong Zhang  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2857525d23c0SHong Zhang  */
2858d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *nn, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2859d71ae5a4SJacob Faibussowitsch {
2860525d23c0SHong Zhang   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
2861c0349474SHong Zhang   PetscInt     i, *collengths, *cia, *cja, n = a->nbs, m = a->mbs;
2862525d23c0SHong Zhang   PetscInt     nz = a->i[m], row, *jj, mr, col;
2863525d23c0SHong Zhang   PetscInt    *cspidx;
2864f6d58c54SBarry Smith 
2865f6d58c54SBarry Smith   PetscFunctionBegin;
2866525d23c0SHong Zhang   *nn = n;
2867525d23c0SHong Zhang   if (!ia) PetscFunctionReturn(0);
2868f6d58c54SBarry Smith 
28699566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(n, &collengths));
28709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &cia));
28719566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cja));
28729566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &cspidx));
2873525d23c0SHong Zhang   jj = a->j;
2874ad540459SPierre Jolivet   for (i = 0; i < nz; i++) collengths[jj[i]]++;
2875525d23c0SHong Zhang   cia[0] = oshift;
2876ad540459SPierre Jolivet   for (i = 0; i < n; i++) cia[i + 1] = cia[i] + collengths[i];
28779566063dSJacob Faibussowitsch   PetscCall(PetscArrayzero(collengths, n));
2878525d23c0SHong Zhang   jj = a->j;
2879525d23c0SHong Zhang   for (row = 0; row < m; row++) {
2880525d23c0SHong Zhang     mr = a->i[row + 1] - a->i[row];
2881525d23c0SHong Zhang     for (i = 0; i < mr; i++) {
2882525d23c0SHong Zhang       col                                         = *jj++;
2883525d23c0SHong Zhang       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2884525d23c0SHong Zhang       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2885525d23c0SHong Zhang     }
2886525d23c0SHong Zhang   }
28879566063dSJacob Faibussowitsch   PetscCall(PetscFree(collengths));
2888071fcb05SBarry Smith   *ia    = cia;
2889071fcb05SBarry Smith   *ja    = cja;
2890525d23c0SHong Zhang   *spidx = cspidx;
2891525d23c0SHong Zhang   PetscFunctionReturn(0);
2892f6d58c54SBarry Smith }
2893f6d58c54SBarry Smith 
2894d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *n, const PetscInt *ia[], const PetscInt *ja[], PetscInt *spidx[], PetscBool *done)
2895d71ae5a4SJacob Faibussowitsch {
2896525d23c0SHong Zhang   PetscFunctionBegin;
28979566063dSJacob Faibussowitsch   PetscCall(MatRestoreColumnIJ_SeqBAIJ(A, oshift, symmetric, inodecompressed, n, ia, ja, done));
28989566063dSJacob Faibussowitsch   PetscCall(PetscFree(*spidx));
2899f6d58c54SBarry Smith   PetscFunctionReturn(0);
2900f6d58c54SBarry Smith }
290199cafbc1SBarry Smith 
2902d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_SeqBAIJ(Mat Y, PetscScalar a)
2903d71ae5a4SJacob Faibussowitsch {
29047d68702bSBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)Y->data;
29057d68702bSBarry Smith 
29067d68702bSBarry Smith   PetscFunctionBegin;
290748a46eb9SPierre Jolivet   if (!Y->preallocated || !aij->nz) PetscCall(MatSeqBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL));
29089566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
29097d68702bSBarry Smith   PetscFunctionReturn(0);
29107d68702bSBarry Smith }
29117d68702bSBarry Smith 
29122593348eSBarry Smith /* -------------------------------------------------------------------*/
2913*dec0b466SHong Zhang static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2914cc2dc46cSBarry Smith                                        MatGetRow_SeqBAIJ,
2915cc2dc46cSBarry Smith                                        MatRestoreRow_SeqBAIJ,
2916cc2dc46cSBarry Smith                                        MatMult_SeqBAIJ_N,
291797304618SKris Buschelman                                        /* 4*/ MatMultAdd_SeqBAIJ_N,
29187c922b88SBarry Smith                                        MatMultTranspose_SeqBAIJ,
29197c922b88SBarry Smith                                        MatMultTransposeAdd_SeqBAIJ,
2920f4259b30SLisandro Dalcin                                        NULL,
2921f4259b30SLisandro Dalcin                                        NULL,
2922f4259b30SLisandro Dalcin                                        NULL,
2923f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
2924cc2dc46cSBarry Smith                                        MatLUFactor_SeqBAIJ,
2925f4259b30SLisandro Dalcin                                        NULL,
2926f4259b30SLisandro Dalcin                                        NULL,
2927f2501298SSatish Balay                                        MatTranspose_SeqBAIJ,
292897304618SKris Buschelman                                        /* 15*/ MatGetInfo_SeqBAIJ,
2929cc2dc46cSBarry Smith                                        MatEqual_SeqBAIJ,
2930cc2dc46cSBarry Smith                                        MatGetDiagonal_SeqBAIJ,
2931cc2dc46cSBarry Smith                                        MatDiagonalScale_SeqBAIJ,
2932cc2dc46cSBarry Smith                                        MatNorm_SeqBAIJ,
2933f4259b30SLisandro Dalcin                                        /* 20*/ NULL,
2934cc2dc46cSBarry Smith                                        MatAssemblyEnd_SeqBAIJ,
2935cc2dc46cSBarry Smith                                        MatSetOption_SeqBAIJ,
2936cc2dc46cSBarry Smith                                        MatZeroEntries_SeqBAIJ,
2937d519adbfSMatthew Knepley                                        /* 24*/ MatZeroRows_SeqBAIJ,
2938f4259b30SLisandro Dalcin                                        NULL,
2939f4259b30SLisandro Dalcin                                        NULL,
2940f4259b30SLisandro Dalcin                                        NULL,
2941f4259b30SLisandro Dalcin                                        NULL,
29424994cf47SJed Brown                                        /* 29*/ MatSetUp_SeqBAIJ,
2943f4259b30SLisandro Dalcin                                        NULL,
2944f4259b30SLisandro Dalcin                                        NULL,
2945f4259b30SLisandro Dalcin                                        NULL,
2946f4259b30SLisandro Dalcin                                        NULL,
2947d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_SeqBAIJ,
2948f4259b30SLisandro Dalcin                                        NULL,
2949f4259b30SLisandro Dalcin                                        NULL,
2950cc2dc46cSBarry Smith                                        MatILUFactor_SeqBAIJ,
2951f4259b30SLisandro Dalcin                                        NULL,
2952d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_SeqBAIJ,
29537dae84e0SHong Zhang                                        MatCreateSubMatrices_SeqBAIJ,
2954cc2dc46cSBarry Smith                                        MatIncreaseOverlap_SeqBAIJ,
2955cc2dc46cSBarry Smith                                        MatGetValues_SeqBAIJ,
29563c896bc6SHong Zhang                                        MatCopy_SeqBAIJ,
2957f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
2958cc2dc46cSBarry Smith                                        MatScale_SeqBAIJ,
29597d68702bSBarry Smith                                        MatShift_SeqBAIJ,
2960f4259b30SLisandro Dalcin                                        NULL,
296197b48c8fSBarry Smith                                        MatZeroRowsColumns_SeqBAIJ,
2962f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
29633b2fbd54SBarry Smith                                        MatGetRowIJ_SeqBAIJ,
296492c4ed94SBarry Smith                                        MatRestoreRowIJ_SeqBAIJ,
29653acb8795SBarry Smith                                        MatGetColumnIJ_SeqBAIJ,
29663acb8795SBarry Smith                                        MatRestoreColumnIJ_SeqBAIJ,
296793dfae19SHong Zhang                                        /* 54*/ MatFDColoringCreate_SeqXAIJ,
2968f4259b30SLisandro Dalcin                                        NULL,
2969f4259b30SLisandro Dalcin                                        NULL,
2970090001bdSToby Isaac                                        NULL,
2971d3825aa8SBarry Smith                                        MatSetValuesBlocked_SeqBAIJ,
29727dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2973b9b97703SBarry Smith                                        MatDestroy_SeqBAIJ,
2974b9b97703SBarry Smith                                        MatView_SeqBAIJ,
2975f4259b30SLisandro Dalcin                                        NULL,
2976f4259b30SLisandro Dalcin                                        NULL,
2977f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
2978f4259b30SLisandro Dalcin                                        NULL,
2979f4259b30SLisandro Dalcin                                        NULL,
2980f4259b30SLisandro Dalcin                                        NULL,
2981f4259b30SLisandro Dalcin                                        NULL,
2982d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2983f4259b30SLisandro Dalcin                                        NULL,
2984c87e5d42SMatthew Knepley                                        MatConvert_Basic,
2985f4259b30SLisandro Dalcin                                        NULL,
2986f4259b30SLisandro Dalcin                                        NULL,
2987f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
2988f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
2989f4259b30SLisandro Dalcin                                        NULL,
2990f4259b30SLisandro Dalcin                                        NULL,
2991f4259b30SLisandro Dalcin                                        NULL,
2992f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
2993f4259b30SLisandro Dalcin                                        NULL,
2994f4259b30SLisandro Dalcin                                        NULL,
2995f4259b30SLisandro Dalcin                                        NULL,
29965bba2384SShri Abhyankar                                        MatLoad_SeqBAIJ,
2997f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
2998f4259b30SLisandro Dalcin                                        NULL,
2999f4259b30SLisandro Dalcin                                        NULL,
3000f4259b30SLisandro Dalcin                                        NULL,
3001f4259b30SLisandro Dalcin                                        NULL,
3002f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
3003f4259b30SLisandro Dalcin                                        NULL,
3004f4259b30SLisandro Dalcin                                        NULL,
3005f4259b30SLisandro Dalcin                                        NULL,
3006f4259b30SLisandro Dalcin                                        NULL,
3007f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
3008f4259b30SLisandro Dalcin                                        NULL,
3009f4259b30SLisandro Dalcin                                        NULL,
3010f4259b30SLisandro Dalcin                                        NULL,
3011f4259b30SLisandro Dalcin                                        NULL,
3012f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
3013f4259b30SLisandro Dalcin                                        NULL,
3014f4259b30SLisandro Dalcin                                        NULL,
30152726fb6dSPierre Jolivet                                        MatConjugate_SeqBAIJ,
3016f4259b30SLisandro Dalcin                                        NULL,
3017f4259b30SLisandro Dalcin                                        /*104*/ NULL,
301899cafbc1SBarry Smith                                        MatRealPart_SeqBAIJ,
30192af78befSBarry Smith                                        MatImaginaryPart_SeqBAIJ,
3020f4259b30SLisandro Dalcin                                        NULL,
3021f4259b30SLisandro Dalcin                                        NULL,
3022f4259b30SLisandro Dalcin                                        /*109*/ NULL,
3023f4259b30SLisandro Dalcin                                        NULL,
3024f4259b30SLisandro Dalcin                                        NULL,
3025f4259b30SLisandro Dalcin                                        NULL,
3026547795f9SHong Zhang                                        MatMissingDiagonal_SeqBAIJ,
3027f4259b30SLisandro Dalcin                                        /*114*/ NULL,
3028f4259b30SLisandro Dalcin                                        NULL,
3029f4259b30SLisandro Dalcin                                        NULL,
3030f4259b30SLisandro Dalcin                                        NULL,
3031f4259b30SLisandro Dalcin                                        NULL,
3032f4259b30SLisandro Dalcin                                        /*119*/ NULL,
3033f4259b30SLisandro Dalcin                                        NULL,
3034547795f9SHong Zhang                                        MatMultHermitianTranspose_SeqBAIJ,
3035d6037b41SHong Zhang                                        MatMultHermitianTransposeAdd_SeqBAIJ,
3036f4259b30SLisandro Dalcin                                        NULL,
3037f4259b30SLisandro Dalcin                                        /*124*/ NULL,
3038857cbf51SRichard Tran Mills                                        MatGetColumnReductions_SeqBAIJ,
30393964eb88SJed Brown                                        MatInvertBlockDiagonal_SeqBAIJ,
3040f4259b30SLisandro Dalcin                                        NULL,
3041f4259b30SLisandro Dalcin                                        NULL,
3042f4259b30SLisandro Dalcin                                        /*129*/ NULL,
3043f4259b30SLisandro Dalcin                                        NULL,
3044f4259b30SLisandro Dalcin                                        NULL,
3045f4259b30SLisandro Dalcin                                        NULL,
3046f4259b30SLisandro Dalcin                                        NULL,
3047f4259b30SLisandro Dalcin                                        /*134*/ NULL,
3048f4259b30SLisandro Dalcin                                        NULL,
3049f4259b30SLisandro Dalcin                                        NULL,
3050f4259b30SLisandro Dalcin                                        NULL,
3051f4259b30SLisandro Dalcin                                        NULL,
305246533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
3053f4259b30SLisandro Dalcin                                        NULL,
3054f4259b30SLisandro Dalcin                                        NULL,
3055bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_SeqXAIJ,
3056f4259b30SLisandro Dalcin                                        NULL,
305786e85357SHong Zhang                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
3058d70f29a3SPierre Jolivet                                        MatDestroySubMatrices_SeqBAIJ,
3059d70f29a3SPierre Jolivet                                        NULL,
306099a7f59eSMark Adams                                        NULL,
306199a7f59eSMark Adams                                        NULL,
30627fb60732SBarry Smith                                        NULL,
30637fb60732SBarry Smith                                        /*150*/ NULL,
3064*dec0b466SHong Zhang                                        NULL};
30652593348eSBarry Smith 
3066d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
3067d71ae5a4SJacob Faibussowitsch {
30683e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30698ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30703e90b805SBarry Smith 
30713e90b805SBarry Smith   PetscFunctionBegin;
30725f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30733e90b805SBarry Smith 
30743e90b805SBarry Smith   /* allocate space for values if not already there */
30754dfa11a4SJacob Faibussowitsch   if (!aij->saved_values) { PetscCall(PetscMalloc1(nz + 1, &aij->saved_values)); }
30763e90b805SBarry Smith 
30773e90b805SBarry Smith   /* copy values over */
30789566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->saved_values, aij->a, nz));
30793e90b805SBarry Smith   PetscFunctionReturn(0);
30803e90b805SBarry Smith }
30813e90b805SBarry Smith 
3082d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
3083d71ae5a4SJacob Faibussowitsch {
30843e90b805SBarry Smith   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
30858ece6314SShri Abhyankar   PetscInt     nz  = aij->i[aij->mbs] * aij->bs2;
30863e90b805SBarry Smith 
30873e90b805SBarry Smith   PetscFunctionBegin;
30885f80ce2aSJacob Faibussowitsch   PetscCheck(aij->nonew == 1, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
30895f80ce2aSJacob Faibussowitsch   PetscCheck(aij->saved_values, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Must call MatStoreValues(A);first");
30903e90b805SBarry Smith 
30913e90b805SBarry Smith   /* copy values over */
30929566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(aij->a, aij->saved_values, nz));
30933e90b805SBarry Smith   PetscFunctionReturn(0);
30943e90b805SBarry Smith }
30953e90b805SBarry Smith 
3096cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
3097cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType, MatReuse, Mat *);
3098273d9f13SBarry Smith 
3099d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B, PetscInt bs, PetscInt nz, PetscInt *nnz)
3100d71ae5a4SJacob Faibussowitsch {
3101a23d5eceSKris Buschelman   Mat_SeqBAIJ *b;
3102535b19f3SBarry Smith   PetscInt     i, mbs, nbs, bs2;
31038afaa268SBarry Smith   PetscBool    flg = PETSC_FALSE, skipallocation = PETSC_FALSE, realalloc = PETSC_FALSE;
3104a23d5eceSKris Buschelman 
3105a23d5eceSKris Buschelman   PetscFunctionBegin;
31062576faa2SJed Brown   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3107ab93d7beSBarry Smith   if (nz == MAT_SKIP_ALLOCATION) {
3108ab93d7beSBarry Smith     skipallocation = PETSC_TRUE;
3109ab93d7beSBarry Smith     nz             = 0;
3110ab93d7beSBarry Smith   }
31118c07d4e3SBarry Smith 
31129566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
31139566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
31149566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
31159566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3116899cda47SBarry Smith 
3117899cda47SBarry Smith   B->preallocated = PETSC_TRUE;
3118899cda47SBarry Smith 
3119d0f46423SBarry Smith   mbs = B->rmap->n / bs;
3120d0f46423SBarry Smith   nbs = B->cmap->n / bs;
3121a23d5eceSKris Buschelman   bs2 = bs * bs;
3122a23d5eceSKris Buschelman 
31235f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n && nbs * bs == B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number rows %" PetscInt_FMT ", cols %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, B->cmap->n, bs);
3124a23d5eceSKris Buschelman 
3125a23d5eceSKris Buschelman   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
31265f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nz cannot be less than 0: value %" PetscInt_FMT, nz);
3127a23d5eceSKris Buschelman   if (nnz) {
3128a23d5eceSKris Buschelman     for (i = 0; i < mbs; i++) {
31295f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be less than 0: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, nnz[i]);
31305f80ce2aSJacob Faibussowitsch       PetscCheck(nnz[i] <= nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "nnz cannot be greater than block row length: local row %" PetscInt_FMT " value %" PetscInt_FMT " rowlength %" PetscInt_FMT, i, nnz[i], nbs);
3131a23d5eceSKris Buschelman     }
3132a23d5eceSKris Buschelman   }
3133a23d5eceSKris Buschelman 
3134a23d5eceSKris Buschelman   b = (Mat_SeqBAIJ *)B->data;
3135d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Optimize options for SEQBAIJ matrix 2 ", "Mat");
31369566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_no_unroll", "Do not optimize for block size (slow)", NULL, flg, &flg, NULL));
3137d0609cedSBarry Smith   PetscOptionsEnd();
31388c07d4e3SBarry Smith 
3139a23d5eceSKris Buschelman   if (!flg) {
3140a23d5eceSKris Buschelman     switch (bs) {
3141a23d5eceSKris Buschelman     case 1:
3142a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_1;
3143a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
3144a23d5eceSKris Buschelman       break;
3145a23d5eceSKris Buschelman     case 2:
3146a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_2;
3147a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
3148a23d5eceSKris Buschelman       break;
3149a23d5eceSKris Buschelman     case 3:
3150a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_3;
3151a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
3152a23d5eceSKris Buschelman       break;
3153a23d5eceSKris Buschelman     case 4:
3154a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_4;
3155a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
3156a23d5eceSKris Buschelman       break;
3157a23d5eceSKris Buschelman     case 5:
3158a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_5;
3159a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
3160a23d5eceSKris Buschelman       break;
3161a23d5eceSKris Buschelman     case 6:
3162a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_6;
3163a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
3164a23d5eceSKris Buschelman       break;
3165a23d5eceSKris Buschelman     case 7:
3166a23d5eceSKris Buschelman       B->ops->mult    = MatMult_SeqBAIJ_7;
3167a23d5eceSKris Buschelman       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
3168a23d5eceSKris Buschelman       break;
31699371c9d4SSatish Balay     case 9: {
31706679dcc1SBarry Smith       PetscInt version = 1;
31719566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31726679dcc1SBarry Smith       switch (version) {
31735f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
31746679dcc1SBarry Smith       case 1:
317596e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
317696e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
31779566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
31786679dcc1SBarry Smith         break;
31796679dcc1SBarry Smith #endif
31806679dcc1SBarry Smith       default:
318196e086a2SDaniel Kokron         B->ops->mult    = MatMult_SeqBAIJ_N;
318296e086a2SDaniel Kokron         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
31839566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
318496e086a2SDaniel Kokron         break;
31856679dcc1SBarry Smith       }
31866679dcc1SBarry Smith       break;
31876679dcc1SBarry Smith     }
3188ebada01fSBarry Smith     case 11:
3189ebada01fSBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_11;
3190ebada01fSBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
3191ebada01fSBarry Smith       break;
31929371c9d4SSatish Balay     case 12: {
31936679dcc1SBarry Smith       PetscInt version = 1;
31949566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
31956679dcc1SBarry Smith       switch (version) {
31966679dcc1SBarry Smith       case 1:
31976679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
31986679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
31999566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32008ab949d8SShri Abhyankar         break;
32016679dcc1SBarry Smith       case 2:
32026679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
32036679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
32049566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32056679dcc1SBarry Smith         break;
32066679dcc1SBarry Smith #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
32076679dcc1SBarry Smith       case 3:
32086679dcc1SBarry Smith         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
32096679dcc1SBarry Smith         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
32109566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using AVX2 for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32116679dcc1SBarry Smith         break;
32126679dcc1SBarry Smith #endif
3213a23d5eceSKris Buschelman       default:
3214a23d5eceSKris Buschelman         B->ops->mult    = MatMult_SeqBAIJ_N;
3215a23d5eceSKris Buschelman         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32169566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32176679dcc1SBarry Smith         break;
32186679dcc1SBarry Smith       }
32196679dcc1SBarry Smith       break;
32206679dcc1SBarry Smith     }
32219371c9d4SSatish Balay     case 15: {
32226679dcc1SBarry Smith       PetscInt version = 1;
32239566063dSJacob Faibussowitsch       PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)B)->prefix, "-mat_baij_mult_version", &version, NULL));
32246679dcc1SBarry Smith       switch (version) {
32256679dcc1SBarry Smith       case 1:
32266679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver1;
32279566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32286679dcc1SBarry Smith         break;
32296679dcc1SBarry Smith       case 2:
32306679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver2;
32319566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32326679dcc1SBarry Smith         break;
32336679dcc1SBarry Smith       case 3:
32346679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver3;
32359566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32366679dcc1SBarry Smith         break;
32376679dcc1SBarry Smith       case 4:
32386679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_15_ver4;
32399566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using version %" PetscInt_FMT " of MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", version, bs));
32406679dcc1SBarry Smith         break;
32416679dcc1SBarry Smith       default:
32426679dcc1SBarry Smith         B->ops->mult = MatMult_SeqBAIJ_N;
32439566063dSJacob Faibussowitsch         PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
32446679dcc1SBarry Smith         break;
32456679dcc1SBarry Smith       }
32466679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32476679dcc1SBarry Smith       break;
32486679dcc1SBarry Smith     }
32496679dcc1SBarry Smith     default:
32506679dcc1SBarry Smith       B->ops->mult    = MatMult_SeqBAIJ_N;
32516679dcc1SBarry Smith       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
32529566063dSJacob Faibussowitsch       PetscCall(PetscInfo((PetscObject)B, "Using BLAS for MatMult for BAIJ for blocksize %" PetscInt_FMT "\n", bs));
3253a23d5eceSKris Buschelman       break;
3254a23d5eceSKris Buschelman     }
3255a23d5eceSKris Buschelman   }
3256e48d15efSToby Isaac   B->ops->sor = MatSOR_SeqBAIJ;
3257a23d5eceSKris Buschelman   b->mbs      = mbs;
3258a23d5eceSKris Buschelman   b->nbs      = nbs;
3259ab93d7beSBarry Smith   if (!skipallocation) {
32602ee49352SLisandro Dalcin     if (!b->imax) {
32619566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mbs, &b->imax, mbs, &b->ilen));
326226fbe8dcSKarl Rupp 
32634fd072dbSBarry Smith       b->free_imax_ilen = PETSC_TRUE;
32642ee49352SLisandro Dalcin     }
3265ab93d7beSBarry Smith     /* b->ilen will count nonzeros in each block row so far. */
326626fbe8dcSKarl Rupp     for (i = 0; i < mbs; i++) b->ilen[i] = 0;
3267a23d5eceSKris Buschelman     if (!nnz) {
3268a23d5eceSKris Buschelman       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3269c62bd62aSJed Brown       else if (nz < 0) nz = 1;
32705d2a9ed1SStefano Zampini       nz = PetscMin(nz, nbs);
3271a23d5eceSKris Buschelman       for (i = 0; i < mbs; i++) b->imax[i] = nz;
32729566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, mbs, &nz));
3273a23d5eceSKris Buschelman     } else {
3274c73702f5SBarry Smith       PetscInt64 nz64 = 0;
32759371c9d4SSatish Balay       for (i = 0; i < mbs; i++) {
32769371c9d4SSatish Balay         b->imax[i] = nnz[i];
32779371c9d4SSatish Balay         nz64 += nnz[i];
32789371c9d4SSatish Balay       }
32799566063dSJacob Faibussowitsch       PetscCall(PetscIntCast(nz64, &nz));
3280a23d5eceSKris Buschelman     }
3281a23d5eceSKris Buschelman 
3282a23d5eceSKris Buschelman     /* allocate the matrix space */
32839566063dSJacob Faibussowitsch     PetscCall(MatSeqXAIJFreeAIJ(B, &b->a, &b->j, &b->i));
3284672ba085SHong Zhang     if (B->structure_only) {
32859566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(nz, &b->j));
32869566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(B->rmap->N + 1, &b->i));
3287672ba085SHong Zhang     } else {
32886679dcc1SBarry Smith       PetscInt nzbs2 = 0;
32899566063dSJacob Faibussowitsch       PetscCall(PetscIntMultError(nz, bs2, &nzbs2));
32909566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(nzbs2, &b->a, nz, &b->j, B->rmap->N + 1, &b->i));
32919566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(b->a, nz * bs2));
3292672ba085SHong Zhang     }
32939566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(b->j, nz));
329426fbe8dcSKarl Rupp 
3295672ba085SHong Zhang     if (B->structure_only) {
3296672ba085SHong Zhang       b->singlemalloc = PETSC_FALSE;
3297672ba085SHong Zhang       b->free_a       = PETSC_FALSE;
3298672ba085SHong Zhang     } else {
3299a23d5eceSKris Buschelman       b->singlemalloc = PETSC_TRUE;
3300672ba085SHong Zhang       b->free_a       = PETSC_TRUE;
3301672ba085SHong Zhang     }
3302672ba085SHong Zhang     b->free_ij = PETSC_TRUE;
3303672ba085SHong Zhang 
3304a23d5eceSKris Buschelman     b->i[0] = 0;
3305ad540459SPierre Jolivet     for (i = 1; i < mbs + 1; i++) b->i[i] = b->i[i - 1] + b->imax[i - 1];
3306672ba085SHong Zhang 
3307e811da20SHong Zhang   } else {
3308e6b907acSBarry Smith     b->free_a  = PETSC_FALSE;
3309e6b907acSBarry Smith     b->free_ij = PETSC_FALSE;
3310ab93d7beSBarry Smith   }
3311a23d5eceSKris Buschelman 
3312a23d5eceSKris Buschelman   b->bs2              = bs2;
3313a23d5eceSKris Buschelman   b->mbs              = mbs;
3314a23d5eceSKris Buschelman   b->nz               = 0;
3315b32cb4a7SJed Brown   b->maxnz            = nz;
3316b32cb4a7SJed Brown   B->info.nz_unneeded = (PetscReal)b->maxnz * bs2;
3317cb7b82ddSBarry Smith   B->was_assembled    = PETSC_FALSE;
3318cb7b82ddSBarry Smith   B->assembled        = PETSC_FALSE;
33199566063dSJacob Faibussowitsch   if (realalloc) PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3320a23d5eceSKris Buschelman   PetscFunctionReturn(0);
3321a23d5eceSKris Buschelman }
3322a23d5eceSKris Buschelman 
3323d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
3324d71ae5a4SJacob Faibussowitsch {
3325725b52f3SLisandro Dalcin   PetscInt     i, m, nz, nz_max = 0, *nnz;
3326f4259b30SLisandro Dalcin   PetscScalar *values      = NULL;
3327d47bf9aaSJed Brown   PetscBool    roworiented = ((Mat_SeqBAIJ *)B->data)->roworiented;
3328725b52f3SLisandro Dalcin 
3329725b52f3SLisandro Dalcin   PetscFunctionBegin;
33305f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
33319566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
33329566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
33339566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
33349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
33359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
3336d0f46423SBarry Smith   m = B->rmap->n / bs;
3337725b52f3SLisandro Dalcin 
33385f80ce2aSJacob Faibussowitsch   PetscCheck(ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
33399566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &nnz));
3340725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3341cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
33425f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
3343725b52f3SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
3344725b52f3SLisandro Dalcin     nnz[i] = nz;
3345725b52f3SLisandro Dalcin   }
33469566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, 0, nnz));
33479566063dSJacob Faibussowitsch   PetscCall(PetscFree(nnz));
3348725b52f3SLisandro Dalcin 
3349725b52f3SLisandro Dalcin   values = (PetscScalar *)V;
335048a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * (nz_max + 1), &values));
3351725b52f3SLisandro Dalcin   for (i = 0; i < m; i++) {
3352cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
3353cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
3354bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) {
3355cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
33569566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, ncols, icols, svals, INSERT_VALUES));
33573adadaf3SJed Brown     } else {
33583adadaf3SJed Brown       PetscInt j;
33593adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
33603adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
33619566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_SeqBAIJ(B, 1, &i, 1, &icols[j], svals, INSERT_VALUES));
33623adadaf3SJed Brown       }
33633adadaf3SJed Brown     }
3364725b52f3SLisandro Dalcin   }
33659566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
33669566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
33679566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
33689566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
3369725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3370725b52f3SLisandro Dalcin }
3371725b52f3SLisandro Dalcin 
3372cda14afcSprj- /*@C
337311a5261eSBarry Smith    MatSeqBAIJGetArray - gives read/write access to the array where the data for a `MATSEQBAIJ` matrix is stored
3374cda14afcSprj- 
3375cda14afcSprj-    Not Collective
3376cda14afcSprj- 
3377cda14afcSprj-    Input Parameter:
337811a5261eSBarry Smith .  mat - a `MATSEQBAIJ` matrix
3379cda14afcSprj- 
3380cda14afcSprj-    Output Parameter:
3381cda14afcSprj- .   array - pointer to the data
3382cda14afcSprj- 
3383cda14afcSprj-    Level: intermediate
3384cda14afcSprj- 
338511a5261eSBarry Smith .seealso: `MATSEQBAIJ`, `MatSeqBAIJRestoreArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3386cda14afcSprj- @*/
3387d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJGetArray(Mat A, PetscScalar **array)
3388d71ae5a4SJacob Faibussowitsch {
3389cda14afcSprj-   PetscFunctionBegin;
3390cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJGetArray_C", (Mat, PetscScalar **), (A, array));
3391cda14afcSprj-   PetscFunctionReturn(0);
3392cda14afcSprj- }
3393cda14afcSprj- 
3394cda14afcSprj- /*@C
339511a5261eSBarry Smith    MatSeqBAIJRestoreArray - returns access to the array where the data for a `MATSEQBAIJ` matrix is stored obtained by `MatSeqBAIJGetArray()`
3396cda14afcSprj- 
3397cda14afcSprj-    Not Collective
3398cda14afcSprj- 
3399cda14afcSprj-    Input Parameters:
340011a5261eSBarry Smith +  mat - a `MATSEQBAIJ` matrix
3401cda14afcSprj- -  array - pointer to the data
3402cda14afcSprj- 
3403cda14afcSprj-    Level: intermediate
3404cda14afcSprj- 
3405db781477SPatrick Sanan .seealso: `MatSeqBAIJGetArray()`, `MatSeqAIJGetArray()`, `MatSeqAIJRestoreArray()`
3406cda14afcSprj- @*/
3407d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJRestoreArray(Mat A, PetscScalar **array)
3408d71ae5a4SJacob Faibussowitsch {
3409cda14afcSprj-   PetscFunctionBegin;
3410cac4c232SBarry Smith   PetscUseMethod(A, "MatSeqBAIJRestoreArray_C", (Mat, PetscScalar **), (A, array));
3411cda14afcSprj-   PetscFunctionReturn(0);
3412cda14afcSprj- }
3413cda14afcSprj- 
34140bad9183SKris Buschelman /*MC
3415fafad747SKris Buschelman    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
34160bad9183SKris Buschelman    block sparse compressed row format.
34170bad9183SKris Buschelman 
34180bad9183SKris Buschelman    Options Database Keys:
34196679dcc1SBarry Smith + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
34206679dcc1SBarry Smith - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
34210bad9183SKris Buschelman 
34220bad9183SKris Buschelman    Level: beginner
34230cd7f59aSBarry Smith 
34240cd7f59aSBarry Smith    Notes:
342511a5261eSBarry Smith     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
342611a5261eSBarry Smith     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
34270bad9183SKris Buschelman 
34286679dcc1SBarry Smith    Run with -info to see what version of the matrix-vector product is being used
34296679dcc1SBarry Smith 
3430db781477SPatrick Sanan .seealso: `MatCreateSeqBAIJ()`
34310bad9183SKris Buschelman M*/
34320bad9183SKris Buschelman 
3433cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType, MatReuse, Mat *);
3434b24902e0SBarry Smith 
3435d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3436d71ae5a4SJacob Faibussowitsch {
3437c1ac3661SBarry Smith   PetscMPIInt  size;
3438b6490206SBarry Smith   Mat_SeqBAIJ *b;
34393b2fbd54SBarry Smith 
34403a40ed3dSBarry Smith   PetscFunctionBegin;
34419566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
34425f80ce2aSJacob Faibussowitsch   PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Comm must be of size 1");
3443b6490206SBarry Smith 
34444dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
3445b0a32e0cSBarry Smith   B->data = (void *)b;
34469566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
344726fbe8dcSKarl Rupp 
3448f4259b30SLisandro Dalcin   b->row          = NULL;
3449f4259b30SLisandro Dalcin   b->col          = NULL;
3450f4259b30SLisandro Dalcin   b->icol         = NULL;
34512593348eSBarry Smith   b->reallocs     = 0;
3452f4259b30SLisandro Dalcin   b->saved_values = NULL;
34532593348eSBarry Smith 
3454c4992f7dSBarry Smith   b->roworiented        = PETSC_TRUE;
34552593348eSBarry Smith   b->nonew              = 0;
3456f4259b30SLisandro Dalcin   b->diag               = NULL;
3457f4259b30SLisandro Dalcin   B->spptr              = NULL;
3458b32cb4a7SJed Brown   B->info.nz_unneeded   = (PetscReal)b->maxnz * b->bs2;
3459a9817697SBarry Smith   b->keepnonzeropattern = PETSC_FALSE;
34604e220ebcSLois Curfman McInnes 
34619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJGetArray_C", MatSeqBAIJGetArray_SeqBAIJ));
34629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJRestoreArray_C", MatSeqBAIJRestoreArray_SeqBAIJ));
34639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_SeqBAIJ));
34649566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_SeqBAIJ));
34659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetColumnIndices_C", MatSeqBAIJSetColumnIndices_SeqBAIJ));
34669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqaij_C", MatConvert_SeqBAIJ_SeqAIJ));
34679566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_seqsbaij_C", MatConvert_SeqBAIJ_SeqSBAIJ));
34689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocation_C", MatSeqBAIJSetPreallocation_SeqBAIJ));
34699566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSeqBAIJSetPreallocationCSR_C", MatSeqBAIJSetPreallocationCSR_SeqBAIJ));
34709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_SeqBAIJ));
34717ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
34729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_hypre_C", MatConvert_AIJ_HYPRE));
34737ea3e4caSstefano_zampini #endif
34749566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqbaij_is_C", MatConvert_XAIJ_IS));
34759566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQBAIJ));
34763a40ed3dSBarry Smith   PetscFunctionReturn(0);
34772593348eSBarry Smith }
34782593348eSBarry Smith 
3479d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C, Mat A, MatDuplicateOption cpvalues, PetscBool mallocmatspace)
3480d71ae5a4SJacob Faibussowitsch {
3481b24902e0SBarry Smith   Mat_SeqBAIJ *c = (Mat_SeqBAIJ *)C->data, *a = (Mat_SeqBAIJ *)A->data;
3482a96a251dSBarry Smith   PetscInt     i, mbs = a->mbs, nz = a->nz, bs2 = a->bs2;
3483de6a44a3SBarry Smith 
34843a40ed3dSBarry Smith   PetscFunctionBegin;
34855f80ce2aSJacob Faibussowitsch   PetscCheck(a->i[mbs] == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupt matrix");
34862593348eSBarry Smith 
34874fd072dbSBarry Smith   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
34884fd072dbSBarry Smith     c->imax           = a->imax;
34894fd072dbSBarry Smith     c->ilen           = a->ilen;
34904fd072dbSBarry Smith     c->free_imax_ilen = PETSC_FALSE;
34914fd072dbSBarry Smith   } else {
34929566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(mbs, &c->imax, mbs, &c->ilen));
3493b6490206SBarry Smith     for (i = 0; i < mbs; i++) {
34942593348eSBarry Smith       c->imax[i] = a->imax[i];
34952593348eSBarry Smith       c->ilen[i] = a->ilen[i];
34962593348eSBarry Smith     }
34974fd072dbSBarry Smith     c->free_imax_ilen = PETSC_TRUE;
34984fd072dbSBarry Smith   }
34992593348eSBarry Smith 
35002593348eSBarry Smith   /* allocate the matrix space */
350116a2bf60SHong Zhang   if (mallocmatspace) {
35024fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35039566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(bs2 * nz, &c->a));
350426fbe8dcSKarl Rupp 
35054fd072dbSBarry Smith       c->i            = a->i;
35064fd072dbSBarry Smith       c->j            = a->j;
3507379be0ddSLisandro Dalcin       c->singlemalloc = PETSC_FALSE;
3508379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
3509379be0ddSLisandro Dalcin       c->free_ij      = PETSC_FALSE;
35104fd072dbSBarry Smith       c->parent       = A;
35111e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35121e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
351326fbe8dcSKarl Rupp 
35149566063dSJacob Faibussowitsch       PetscCall(PetscObjectReference((PetscObject)A));
35159566063dSJacob Faibussowitsch       PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35169566063dSJacob Faibussowitsch       PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
35174fd072dbSBarry Smith     } else {
35189566063dSJacob Faibussowitsch       PetscCall(PetscMalloc3(bs2 * nz, &c->a, nz, &c->j, mbs + 1, &c->i));
351926fbe8dcSKarl Rupp 
3520c4992f7dSBarry Smith       c->singlemalloc = PETSC_TRUE;
3521379be0ddSLisandro Dalcin       c->free_a       = PETSC_TRUE;
35224fd072dbSBarry Smith       c->free_ij      = PETSC_TRUE;
352326fbe8dcSKarl Rupp 
35249566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(c->i, a->i, mbs + 1));
3525b6490206SBarry Smith       if (mbs > 0) {
35269566063dSJacob Faibussowitsch         PetscCall(PetscArraycpy(c->j, a->j, nz));
35272e8a6d31SBarry Smith         if (cpvalues == MAT_COPY_VALUES) {
35289566063dSJacob Faibussowitsch           PetscCall(PetscArraycpy(c->a, a->a, bs2 * nz));
35292e8a6d31SBarry Smith         } else {
35309566063dSJacob Faibussowitsch           PetscCall(PetscArrayzero(c->a, bs2 * nz));
35312593348eSBarry Smith         }
35322593348eSBarry Smith       }
35331e40a84eSLisandro Dalcin       C->preallocated = PETSC_TRUE;
35341e40a84eSLisandro Dalcin       C->assembled    = PETSC_TRUE;
353516a2bf60SHong Zhang     }
35364fd072dbSBarry Smith   }
353716a2bf60SHong Zhang 
35382593348eSBarry Smith   c->roworiented = a->roworiented;
35392593348eSBarry Smith   c->nonew       = a->nonew;
354026fbe8dcSKarl Rupp 
35419566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->rmap, &C->rmap));
35429566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(A->cmap, &C->cmap));
354326fbe8dcSKarl Rupp 
35445c9eb25fSBarry Smith   c->bs2 = a->bs2;
35455c9eb25fSBarry Smith   c->mbs = a->mbs;
35465c9eb25fSBarry Smith   c->nbs = a->nbs;
35472593348eSBarry Smith 
35482593348eSBarry Smith   if (a->diag) {
35494fd072dbSBarry Smith     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
35504fd072dbSBarry Smith       c->diag      = a->diag;
35514fd072dbSBarry Smith       c->free_diag = PETSC_FALSE;
35524fd072dbSBarry Smith     } else {
35539566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mbs + 1, &c->diag));
355426fbe8dcSKarl Rupp       for (i = 0; i < mbs; i++) c->diag[i] = a->diag[i];
35554fd072dbSBarry Smith       c->free_diag = PETSC_TRUE;
35564fd072dbSBarry Smith     }
3557f4259b30SLisandro Dalcin   } else c->diag = NULL;
355826fbe8dcSKarl Rupp 
35592593348eSBarry Smith   c->nz         = a->nz;
3560f2cbd3d5SJed Brown   c->maxnz      = a->nz; /* Since we allocate exactly the right amount */
3561f361c04dSBarry Smith   c->solve_work = NULL;
3562f361c04dSBarry Smith   c->mult_work  = NULL;
3563f361c04dSBarry Smith   c->sor_workt  = NULL;
3564f361c04dSBarry Smith   c->sor_work   = NULL;
356588e51ccdSHong Zhang 
356688e51ccdSHong Zhang   c->compressedrow.use   = a->compressedrow.use;
356788e51ccdSHong Zhang   c->compressedrow.nrows = a->compressedrow.nrows;
3568cd6b891eSBarry Smith   if (a->compressedrow.use) {
356988e51ccdSHong Zhang     i = a->compressedrow.nrows;
35709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(i + 1, &c->compressedrow.i, i + 1, &c->compressedrow.rindex));
35719566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.i, a->compressedrow.i, i + 1));
35729566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(c->compressedrow.rindex, a->compressedrow.rindex, i));
357388e51ccdSHong Zhang   } else {
357488e51ccdSHong Zhang     c->compressedrow.use    = PETSC_FALSE;
35750298fd71SBarry Smith     c->compressedrow.i      = NULL;
35760298fd71SBarry Smith     c->compressedrow.rindex = NULL;
357788e51ccdSHong Zhang   }
3578e56f5c9eSBarry Smith   C->nonzerostate = A->nonzerostate;
357926fbe8dcSKarl Rupp 
35809566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)A)->qlist, &((PetscObject)C)->qlist));
35813a40ed3dSBarry Smith   PetscFunctionReturn(0);
35822593348eSBarry Smith }
35832593348eSBarry Smith 
3584d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDuplicate_SeqBAIJ(Mat A, MatDuplicateOption cpvalues, Mat *B)
3585d71ae5a4SJacob Faibussowitsch {
3586b24902e0SBarry Smith   PetscFunctionBegin;
35879566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), B));
35889566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*B, A->rmap->N, A->cmap->n, A->rmap->N, A->cmap->n));
35899566063dSJacob Faibussowitsch   PetscCall(MatSetType(*B, MATSEQBAIJ));
35909566063dSJacob Faibussowitsch   PetscCall(MatDuplicateNoCreate_SeqBAIJ(*B, A, cpvalues, PETSC_TRUE));
3591b24902e0SBarry Smith   PetscFunctionReturn(0);
3592b24902e0SBarry Smith }
3593b24902e0SBarry Smith 
3594618cc2edSLisandro Dalcin /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3595d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat, PetscViewer viewer)
3596d71ae5a4SJacob Faibussowitsch {
3597b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3598b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs;
3599b51a4376SLisandro Dalcin   PetscScalar *matvals;
3600b51a4376SLisandro Dalcin 
3601b51a4376SLisandro Dalcin   PetscFunctionBegin;
36029566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3603b51a4376SLisandro Dalcin 
3604b51a4376SLisandro Dalcin   /* read matrix header */
36059566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
36065f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
36079371c9d4SSatish Balay   M  = header[1];
36089371c9d4SSatish Balay   N  = header[2];
36099371c9d4SSatish Balay   nz = header[3];
36105f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
36115f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
36125f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as SeqBAIJ");
3613b51a4376SLisandro Dalcin 
3614b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
36159566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3616b51a4376SLisandro Dalcin   /* set local and global sizes if not set already */
3617b51a4376SLisandro Dalcin   if (mat->rmap->n < 0) mat->rmap->n = M;
3618b51a4376SLisandro Dalcin   if (mat->cmap->n < 0) mat->cmap->n = N;
3619b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3620b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
36219566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
36229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3623b51a4376SLisandro Dalcin 
3624b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
36259566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
36265f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
36279566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
36289566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
36299371c9d4SSatish Balay   mbs = m / bs;
36309371c9d4SSatish Balay   nbs = n / bs;
3631b51a4376SLisandro Dalcin 
3632b51a4376SLisandro Dalcin   /* read in row lengths, column indices and nonzero values */
36339566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
36349566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, rowidxs + 1, m, NULL, PETSC_INT));
36359371c9d4SSatish Balay   rowidxs[0] = 0;
36369371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
3637b51a4376SLisandro Dalcin   sum = rowidxs[m];
36385f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3639b51a4376SLisandro Dalcin 
3640b51a4376SLisandro Dalcin   /* read in column indices and nonzero values */
36419566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, nz, &matvals));
36429566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, colidxs, rowidxs[m], NULL, PETSC_INT));
36439566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, matvals, rowidxs[m], NULL, PETSC_SCALAR));
3644b51a4376SLisandro Dalcin 
3645b51a4376SLisandro Dalcin   {               /* preallocate matrix storage */
3646b51a4376SLisandro Dalcin     PetscBT   bt; /* helper bit set to count nonzeros */
3647b51a4376SLisandro Dalcin     PetscInt *nnz;
3648618cc2edSLisandro Dalcin     PetscBool sbaij;
3649b51a4376SLisandro Dalcin 
36509566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
36519566063dSJacob Faibussowitsch     PetscCall(PetscCalloc1(mbs, &nnz));
36529566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSBAIJ, &sbaij));
3653b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
36549566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
3655618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3656618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3657618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3658618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3659618cc2edSLisandro Dalcin           if (!sbaij || col >= row)
3660618cc2edSLisandro Dalcin             if (!PetscBTLookupSet(bt, col / bs)) nnz[i]++;
3661618cc2edSLisandro Dalcin         }
3662618cc2edSLisandro Dalcin       }
3663b51a4376SLisandro Dalcin     }
36649566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
36659566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(mat, bs, 0, nnz));
36669566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(mat, bs, 0, nnz));
36679566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz));
3668b51a4376SLisandro Dalcin   }
3669b51a4376SLisandro Dalcin 
3670b51a4376SLisandro Dalcin   /* store matrix values */
3671b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3672b51a4376SLisandro Dalcin     PetscInt row = i, s = rowidxs[i], e = rowidxs[i + 1];
36739566063dSJacob Faibussowitsch     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3674b51a4376SLisandro Dalcin   }
3675b51a4376SLisandro Dalcin 
36769566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
36779566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
36789566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
36799566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3680b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3681b51a4376SLisandro Dalcin }
3682b51a4376SLisandro Dalcin 
3683d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_SeqBAIJ(Mat mat, PetscViewer viewer)
3684d71ae5a4SJacob Faibussowitsch {
36857f489da9SVaclav Hapla   PetscBool isbinary;
3686f501eaabSShri Abhyankar 
3687f501eaabSShri Abhyankar   PetscFunctionBegin;
36889566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
36895f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
36909566063dSJacob Faibussowitsch   PetscCall(MatLoad_SeqBAIJ_Binary(mat, viewer));
3691f501eaabSShri Abhyankar   PetscFunctionReturn(0);
3692f501eaabSShri Abhyankar }
3693f501eaabSShri Abhyankar 
3694273d9f13SBarry Smith /*@C
369511a5261eSBarry Smith    MatCreateSeqBAIJ - Creates a sparse matrix in `MATSEQAIJ` (block
3696273d9f13SBarry Smith    compressed row) format.  For good matrix assembly performance the
3697273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3698273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3699273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
37002593348eSBarry Smith 
3701d083f849SBarry Smith    Collective
3702273d9f13SBarry Smith 
3703273d9f13SBarry Smith    Input Parameters:
370411a5261eSBarry Smith +  comm - MPI communicator, set to `PETSC_COMM_SELF`
370511a5261eSBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
370611a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3707273d9f13SBarry Smith .  m - number of rows
3708273d9f13SBarry Smith .  n - number of columns
370935d8aa7fSBarry Smith .  nz - number of nonzero blocks  per block row (same for all rows)
371035d8aa7fSBarry Smith -  nnz - array containing the number of nonzero blocks in the various block rows
37110298fd71SBarry Smith          (possibly different for each block row) or NULL
3712273d9f13SBarry Smith 
3713273d9f13SBarry Smith    Output Parameter:
3714273d9f13SBarry Smith .  A - the matrix
3715273d9f13SBarry Smith 
371611a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3717f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
371811a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
3719175b88e8SBarry Smith 
3720273d9f13SBarry Smith    Options Database Keys:
372111a5261eSBarry Smith +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3722a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3723273d9f13SBarry Smith 
3724273d9f13SBarry Smith    Level: intermediate
3725273d9f13SBarry Smith 
3726273d9f13SBarry Smith    Notes:
3727d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
3728d1be2dadSMatthew Knepley 
372949a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
373049a6f317SBarry Smith 
373135d8aa7fSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
373235d8aa7fSBarry Smith 
373311a5261eSBarry Smith    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3734273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3735273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3736273d9f13SBarry Smith 
3737273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
373811a5261eSBarry Smith    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3739651615e1SBarry Smith    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3740273d9f13SBarry Smith    matrices.
3741273d9f13SBarry Smith 
3742651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
3743273d9f13SBarry Smith @*/
3744d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
3745d71ae5a4SJacob Faibussowitsch {
3746273d9f13SBarry Smith   PetscFunctionBegin;
37479566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
37489566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, m, n));
37499566063dSJacob Faibussowitsch   PetscCall(MatSetType(*A, MATSEQBAIJ));
37509566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*A, bs, nz, (PetscInt *)nnz));
3751273d9f13SBarry Smith   PetscFunctionReturn(0);
3752273d9f13SBarry Smith }
3753273d9f13SBarry Smith 
3754273d9f13SBarry Smith /*@C
3755273d9f13SBarry Smith    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3756273d9f13SBarry Smith    per row in the matrix. For good matrix assembly performance the
3757273d9f13SBarry Smith    user should preallocate the matrix storage by setting the parameter nz
3758273d9f13SBarry Smith    (or the array nnz).  By setting these parameters accurately, performance
3759273d9f13SBarry Smith    during matrix assembly can be increased by more than a factor of 50.
3760273d9f13SBarry Smith 
3761d083f849SBarry Smith    Collective
3762273d9f13SBarry Smith 
3763273d9f13SBarry Smith    Input Parameters:
37641c4f3114SJed Brown +  B - the matrix
376511a5261eSBarry Smith .  bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
376611a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
3767273d9f13SBarry Smith .  nz - number of block nonzeros per block row (same for all rows)
3768273d9f13SBarry Smith -  nnz - array containing the number of block nonzeros in the various block rows
37690298fd71SBarry Smith          (possibly different for each block row) or NULL
3770273d9f13SBarry Smith 
3771273d9f13SBarry Smith    Options Database Keys:
377211a5261eSBarry Smith +   -mat_no_unroll - uses code that does not unroll the loops in the block calculations (much slower)
3773a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
3774273d9f13SBarry Smith 
3775273d9f13SBarry Smith    Level: intermediate
3776273d9f13SBarry Smith 
3777273d9f13SBarry Smith    Notes:
377849a6f317SBarry Smith    If the nnz parameter is given then the nz parameter is ignored
377949a6f317SBarry Smith 
378011a5261eSBarry Smith    You can call `MatGetInfo()` to get information on how effective the preallocation was;
3781aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3782aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
3783aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
3784aa95bbe8SBarry Smith 
378511a5261eSBarry Smith    The `MATSEQBAIJ` format is fully compatible with standard Fortran 77
3786273d9f13SBarry Smith    storage.  That is, the stored row and column indices can begin at
3787273d9f13SBarry Smith    either one (as in Fortran) or zero.  See the users' manual for details.
3788273d9f13SBarry Smith 
3789273d9f13SBarry Smith    Specify the preallocated storage with either nz or nnz (not both).
379011a5261eSBarry Smith    Set nz = `PETSC_DEFAULT` and nnz = NULL for PETSc to control dynamic memory
3791651615e1SBarry Smith    allocation.  See [Sparse Matrices](sec_matsparse) for details.
3792273d9f13SBarry Smith 
3793651615e1SBarry Smith .seealso: [Sparse Matrices](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatGetInfo()`
3794273d9f13SBarry Smith @*/
3795d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt nz, const PetscInt nnz[])
3796d71ae5a4SJacob Faibussowitsch {
3797273d9f13SBarry Smith   PetscFunctionBegin;
37986ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
37996ba663aaSJed Brown   PetscValidType(B, 1);
38006ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3801cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[]), (B, bs, nz, nnz));
3802273d9f13SBarry Smith   PetscFunctionReturn(0);
3803273d9f13SBarry Smith }
3804a1d92eedSBarry Smith 
3805725b52f3SLisandro Dalcin /*@C
380611a5261eSBarry Smith    MatSeqBAIJSetPreallocationCSR - Creates a sparse sequential matrix in `MATSEQBAIJ` format using the given nonzero structure and (optional) numerical values
3807725b52f3SLisandro Dalcin 
3808d083f849SBarry Smith    Collective
3809725b52f3SLisandro Dalcin 
3810725b52f3SLisandro Dalcin    Input Parameters:
38111c4f3114SJed Brown +  B - the matrix
3812725b52f3SLisandro Dalcin .  i - the indices into j for the start of each local row (starts with zero)
3813725b52f3SLisandro Dalcin .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3814725b52f3SLisandro Dalcin -  v - optional values in the matrix
3815725b52f3SLisandro Dalcin 
3816664954b6SBarry Smith    Level: advanced
3817725b52f3SLisandro Dalcin 
38183adadaf3SJed Brown    Notes:
381911a5261eSBarry Smith    The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
382011a5261eSBarry Smith    may want to use the default `MAT_ROW_ORIENTED` of `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
38213adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
382211a5261eSBarry Smith    `MAT_ROW_ORIENTED` of `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
38233adadaf3SJed Brown    block column and the second index is over columns within a block.
38243adadaf3SJed Brown 
3825664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3826664954b6SBarry Smith 
3827db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatSeqBAIJSetPreallocation()`, `MATSEQBAIJ`
3828725b52f3SLisandro Dalcin @*/
3829d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
3830d71ae5a4SJacob Faibussowitsch {
3831725b52f3SLisandro Dalcin   PetscFunctionBegin;
38326ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
38336ba663aaSJed Brown   PetscValidType(B, 1);
38346ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
3835cac4c232SBarry Smith   PetscTryMethod(B, "MatSeqBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
3836725b52f3SLisandro Dalcin   PetscFunctionReturn(0);
3837725b52f3SLisandro Dalcin }
3838725b52f3SLisandro Dalcin 
3839c75a6043SHong Zhang /*@
384011a5261eSBarry Smith      MatCreateSeqBAIJWithArrays - Creates a `MATSEQBAIJ` matrix using matrix elements provided by the user.
3841c75a6043SHong Zhang 
3842d083f849SBarry Smith      Collective
3843c75a6043SHong Zhang 
3844c75a6043SHong Zhang    Input Parameters:
3845c75a6043SHong Zhang +  comm - must be an MPI communicator of size 1
3846c75a6043SHong Zhang .  bs - size of block
3847c75a6043SHong Zhang .  m - number of rows
3848c75a6043SHong Zhang .  n - number of columns
3849483a2f95SBarry Smith .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3850c75a6043SHong Zhang .  j - column indices
3851c75a6043SHong Zhang -  a - matrix values
3852c75a6043SHong Zhang 
3853c75a6043SHong Zhang    Output Parameter:
3854c75a6043SHong Zhang .  mat - the matrix
3855c75a6043SHong Zhang 
3856dfb205c3SBarry Smith    Level: advanced
3857c75a6043SHong Zhang 
3858c75a6043SHong Zhang    Notes:
3859c75a6043SHong Zhang        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3860c75a6043SHong Zhang     once the matrix is destroyed
3861c75a6043SHong Zhang 
3862c75a6043SHong Zhang        You cannot set new nonzero locations into this matrix, that will generate an error.
3863c75a6043SHong Zhang 
3864c75a6043SHong Zhang        The i and j indices are 0 based
3865c75a6043SHong Zhang 
386611a5261eSBarry Smith        When block size is greater than 1 the matrix values must be stored using the `MATSEQBAIJ` storage format
3867dfb205c3SBarry Smith 
38683adadaf3SJed Brown       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
38693adadaf3SJed Brown       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
38703adadaf3SJed Brown       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
38713adadaf3SJed Brown       with column-major ordering within blocks.
3872dfb205c3SBarry Smith 
3873db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateBAIJ()`, `MatCreateSeqBAIJ()`
3874c75a6043SHong Zhang @*/
3875d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt i[], PetscInt j[], PetscScalar a[], Mat *mat)
3876d71ae5a4SJacob Faibussowitsch {
3877c75a6043SHong Zhang   PetscInt     ii;
3878c75a6043SHong Zhang   Mat_SeqBAIJ *baij;
3879c75a6043SHong Zhang 
3880c75a6043SHong Zhang   PetscFunctionBegin;
38815f80ce2aSJacob Faibussowitsch   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "block size %" PetscInt_FMT " > 1 is not supported yet", bs);
38825f80ce2aSJacob Faibussowitsch   if (m > 0) PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
3883c75a6043SHong Zhang 
38849566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
38859566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, m, n));
38869566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATSEQBAIJ));
38879566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(*mat, bs, MAT_SKIP_ALLOCATION, NULL));
3888c75a6043SHong Zhang   baij = (Mat_SeqBAIJ *)(*mat)->data;
38899566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &baij->imax, m, &baij->ilen));
3890c75a6043SHong Zhang 
3891c75a6043SHong Zhang   baij->i = i;
3892c75a6043SHong Zhang   baij->j = j;
3893c75a6043SHong Zhang   baij->a = a;
389426fbe8dcSKarl Rupp 
3895c75a6043SHong Zhang   baij->singlemalloc = PETSC_FALSE;
3896c75a6043SHong Zhang   baij->nonew        = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3897e6b907acSBarry Smith   baij->free_a       = PETSC_FALSE;
3898e6b907acSBarry Smith   baij->free_ij      = PETSC_FALSE;
3899c75a6043SHong Zhang 
3900c75a6043SHong Zhang   for (ii = 0; ii < m; ii++) {
3901c75a6043SHong Zhang     baij->ilen[ii] = baij->imax[ii] = i[ii + 1] - i[ii];
39026bdcaf15SBarry Smith     PetscCheck(i[ii + 1] - i[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row length in i (row indices) row = %" PetscInt_FMT " length = %" PetscInt_FMT, ii, i[ii + 1] - i[ii]);
3903c75a6043SHong Zhang   }
390476bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
3905c75a6043SHong Zhang     for (ii = 0; ii < baij->i[m]; ii++) {
39066bdcaf15SBarry Smith       PetscCheck(j[ii] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative column index at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
39076bdcaf15SBarry Smith       PetscCheck(j[ii] <= n - 1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index to large at location = %" PetscInt_FMT " index = %" PetscInt_FMT, ii, j[ii]);
3908c75a6043SHong Zhang     }
390976bd3646SJed Brown   }
3910c75a6043SHong Zhang 
39119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY));
39129566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY));
3913c75a6043SHong Zhang   PetscFunctionReturn(0);
3914c75a6043SHong Zhang }
3915bdf6f3fcSHong Zhang 
3916d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
3917d71ae5a4SJacob Faibussowitsch {
3918bdf6f3fcSHong Zhang   PetscFunctionBegin;
39199566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm, inmat, n, scall, outmat));
3920bdf6f3fcSHong Zhang   PetscFunctionReturn(0);
3921bdf6f3fcSHong Zhang }
3922