xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision dec0b4665a36c357901b00de474573dbc58fdd79)
1a30f8f8cSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
5c6db04a5SJed Brown #include <petscblaslapack.h>
6a30f8f8cSSatish Balay 
76214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
8cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
96214f412SHong Zhang #endif
10d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
11d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
12d24d4204SJose E. Roman #endif
13b147fbf3SStefano Zampini 
14b147fbf3SStefano Zampini /* This could be moved to matimpl.h */
15d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16d71ae5a4SJacob Faibussowitsch {
17b147fbf3SStefano Zampini   Mat       preallocator;
18b147fbf3SStefano Zampini   PetscInt  r, rstart, rend;
19b147fbf3SStefano Zampini   PetscInt  bs, i, m, n, M, N;
20b147fbf3SStefano Zampini   PetscBool cong = PETSC_TRUE;
21b147fbf3SStefano Zampini 
22b147fbf3SStefano Zampini   PetscFunctionBegin;
23b147fbf3SStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
24b147fbf3SStefano Zampini   PetscValidLogicalCollectiveInt(B, nm, 2);
25b147fbf3SStefano Zampini   for (i = 0; i < nm; i++) {
26b147fbf3SStefano Zampini     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
279566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
285f80ce2aSJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
29b147fbf3SStefano Zampini   }
30b147fbf3SStefano Zampini   PetscValidLogicalCollectiveBool(B, fill, 5);
319566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(B, &bs));
329566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, &M, &N));
339566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(B, &m, &n));
349566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
359566063dSJacob Faibussowitsch   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
369566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(preallocator, bs));
379566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(preallocator, m, n, M, N));
389566063dSJacob Faibussowitsch   PetscCall(MatSetUp(preallocator));
399566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
40b147fbf3SStefano Zampini   for (r = rstart; r < rend; ++r) {
41b147fbf3SStefano Zampini     PetscInt           ncols;
42b147fbf3SStefano Zampini     const PetscInt    *row;
43b147fbf3SStefano Zampini     const PetscScalar *vals;
44b147fbf3SStefano Zampini 
45b147fbf3SStefano Zampini     for (i = 0; i < nm; i++) {
469566063dSJacob Faibussowitsch       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
479566063dSJacob Faibussowitsch       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
4848a46eb9SPierre Jolivet       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
499566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
50b147fbf3SStefano Zampini     }
51b147fbf3SStefano Zampini   }
529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
549566063dSJacob Faibussowitsch   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
559566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&preallocator));
56b147fbf3SStefano Zampini   PetscFunctionReturn(0);
57b147fbf3SStefano Zampini }
58b147fbf3SStefano Zampini 
59d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60d71ae5a4SJacob Faibussowitsch {
61b147fbf3SStefano Zampini   Mat      B;
62b147fbf3SStefano Zampini   PetscInt r;
63b147fbf3SStefano Zampini 
64b147fbf3SStefano Zampini   PetscFunctionBegin;
65b147fbf3SStefano Zampini   if (reuse != MAT_REUSE_MATRIX) {
6628d58a37SPierre Jolivet     PetscBool symm = PETSC_TRUE, isdense;
67b147fbf3SStefano Zampini     PetscInt  bs;
68b147fbf3SStefano Zampini 
699566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
709566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
719566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, newtype));
729566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSize(A, &bs));
739566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSize(B, bs));
749566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
759566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
769566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
7728d58a37SPierre Jolivet     if (!isdense) {
789566063dSJacob Faibussowitsch       PetscCall(MatGetRowUpperTriangular(A));
799566063dSJacob Faibussowitsch       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
809566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowUpperTriangular(A));
8128d58a37SPierre Jolivet     } else {
829566063dSJacob Faibussowitsch       PetscCall(MatSetUp(B));
8328d58a37SPierre Jolivet     }
8428d58a37SPierre Jolivet   } else {
8528d58a37SPierre Jolivet     B = *newmat;
869566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(B));
8728d58a37SPierre Jolivet   }
88b147fbf3SStefano Zampini 
899566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
90b147fbf3SStefano Zampini   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
91b147fbf3SStefano Zampini     PetscInt           ncols;
92b147fbf3SStefano Zampini     const PetscInt    *row;
93b147fbf3SStefano Zampini     const PetscScalar *vals;
94b147fbf3SStefano Zampini 
959566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
969566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
97eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
98b94d7dedSBarry Smith     if (A->hermitian == PETSC_BOOL3_TRUE) {
99eb1ec7c1SStefano Zampini       PetscInt i;
10048a46eb9SPierre Jolivet       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
101eb1ec7c1SStefano Zampini     } else {
1029566063dSJacob Faibussowitsch       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
103eb1ec7c1SStefano Zampini     }
104eb1ec7c1SStefano Zampini #else
1059566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
106eb1ec7c1SStefano Zampini #endif
1079566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
108b147fbf3SStefano Zampini   }
1099566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
1109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
112b147fbf3SStefano Zampini 
113b147fbf3SStefano Zampini   if (reuse == MAT_INPLACE_MATRIX) {
1149566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
115b147fbf3SStefano Zampini   } else {
116b147fbf3SStefano Zampini     *newmat = B;
117b147fbf3SStefano Zampini   }
118b147fbf3SStefano Zampini   PetscFunctionReturn(0);
119b147fbf3SStefano Zampini }
120b147fbf3SStefano Zampini 
121d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
122d71ae5a4SJacob Faibussowitsch {
123f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
124a30f8f8cSSatish Balay 
125a30f8f8cSSatish Balay   PetscFunctionBegin;
1269566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
1279566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
128a30f8f8cSSatish Balay   PetscFunctionReturn(0);
129a30f8f8cSSatish Balay }
130a30f8f8cSSatish Balay 
131d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
132d71ae5a4SJacob Faibussowitsch {
133f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
134a30f8f8cSSatish Balay 
135a30f8f8cSSatish Balay   PetscFunctionBegin;
1369566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
1379566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
138a30f8f8cSSatish Balay   PetscFunctionReturn(0);
139a30f8f8cSSatish Balay }
140a30f8f8cSSatish Balay 
141d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
142a30f8f8cSSatish Balay   { \
143a30f8f8cSSatish Balay     brow = row / bs; \
1449371c9d4SSatish Balay     rp   = aj + ai[brow]; \
1459371c9d4SSatish Balay     ap   = aa + bs2 * ai[brow]; \
1469371c9d4SSatish Balay     rmax = aimax[brow]; \
1479371c9d4SSatish Balay     nrow = ailen[brow]; \
148a30f8f8cSSatish Balay     bcol = col / bs; \
1499371c9d4SSatish Balay     ridx = row % bs; \
1509371c9d4SSatish Balay     cidx = col % bs; \
1519371c9d4SSatish Balay     low  = 0; \
1529371c9d4SSatish Balay     high = nrow; \
153a30f8f8cSSatish Balay     while (high - low > 3) { \
154a30f8f8cSSatish Balay       t = (low + high) / 2; \
155a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
156a30f8f8cSSatish Balay       else low = t; \
157a30f8f8cSSatish Balay     } \
158a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
159a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
160a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
161a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
162a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
163a30f8f8cSSatish Balay         else *bap = value; \
164a30f8f8cSSatish Balay         goto a_noinsert; \
165a30f8f8cSSatish Balay       } \
166a30f8f8cSSatish Balay     } \
167a30f8f8cSSatish Balay     if (a->nonew == 1) goto a_noinsert; \
1685f80ce2aSJacob Faibussowitsch     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
170a30f8f8cSSatish Balay     N = nrow++ - 1; \
171a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
1729566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
1739566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
1749566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
175a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
176a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
177e56f5c9eSBarry Smith     A->nonzerostate++; \
178a30f8f8cSSatish Balay   a_noinsert:; \
179a30f8f8cSSatish Balay     ailen[brow] = nrow; \
180a30f8f8cSSatish Balay   }
181e5e170daSBarry Smith 
182d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
183a30f8f8cSSatish Balay   { \
184a30f8f8cSSatish Balay     brow = row / bs; \
1859371c9d4SSatish Balay     rp   = bj + bi[brow]; \
1869371c9d4SSatish Balay     ap   = ba + bs2 * bi[brow]; \
1879371c9d4SSatish Balay     rmax = bimax[brow]; \
1889371c9d4SSatish Balay     nrow = bilen[brow]; \
189a30f8f8cSSatish Balay     bcol = col / bs; \
1909371c9d4SSatish Balay     ridx = row % bs; \
1919371c9d4SSatish Balay     cidx = col % bs; \
1929371c9d4SSatish Balay     low  = 0; \
1939371c9d4SSatish Balay     high = nrow; \
194a30f8f8cSSatish Balay     while (high - low > 3) { \
195a30f8f8cSSatish Balay       t = (low + high) / 2; \
196a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
197a30f8f8cSSatish Balay       else low = t; \
198a30f8f8cSSatish Balay     } \
199a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
200a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
201a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
202a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
203a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
204a30f8f8cSSatish Balay         else *bap = value; \
205a30f8f8cSSatish Balay         goto b_noinsert; \
206a30f8f8cSSatish Balay       } \
207a30f8f8cSSatish Balay     } \
208a30f8f8cSSatish Balay     if (b->nonew == 1) goto b_noinsert; \
2095f80ce2aSJacob Faibussowitsch     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
210fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
211a30f8f8cSSatish Balay     N = nrow++ - 1; \
212a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2139566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2149566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2159566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
216a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
217a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
218e56f5c9eSBarry Smith     B->nonzerostate++; \
219a30f8f8cSSatish Balay   b_noinsert:; \
220a30f8f8cSSatish Balay     bilen[brow] = nrow; \
221a30f8f8cSSatish Balay   }
222a30f8f8cSSatish Balay 
223a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
224476417e5SBarry Smith    Any a(i,j) with i>j input by user is ingored or generates an error
225a30f8f8cSSatish Balay */
226d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
227d71ae5a4SJacob Faibussowitsch {
228a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
229a30f8f8cSSatish Balay   MatScalar     value;
230ace3abfcSBarry Smith   PetscBool     roworiented = baij->roworiented;
2311302d50aSBarry Smith   PetscInt      i, j, row, col;
232d0f46423SBarry Smith   PetscInt      rstart_orig = mat->rmap->rstart;
233d0f46423SBarry Smith   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
234d0f46423SBarry Smith   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
235a30f8f8cSSatish Balay 
236a30f8f8cSSatish Balay   /* Some Variables required in the macro */
237a30f8f8cSSatish Balay   Mat           A     = baij->A;
238a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
2391302d50aSBarry Smith   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
240a30f8f8cSSatish Balay   MatScalar    *aa = a->a;
241a30f8f8cSSatish Balay 
242a30f8f8cSSatish Balay   Mat          B     = baij->B;
243a30f8f8cSSatish Balay   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
2441302d50aSBarry Smith   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
245a30f8f8cSSatish Balay   MatScalar   *ba = b->a;
246a30f8f8cSSatish Balay 
2471302d50aSBarry Smith   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
2481302d50aSBarry Smith   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
249a30f8f8cSSatish Balay   MatScalar *ap, *bap;
250a30f8f8cSSatish Balay 
251a30f8f8cSSatish Balay   /* for stash */
2520298fd71SBarry Smith   PetscInt   n_loc, *in_loc = NULL;
2530298fd71SBarry Smith   MatScalar *v_loc = NULL;
254a30f8f8cSSatish Balay 
255a30f8f8cSSatish Balay   PetscFunctionBegin;
256a30f8f8cSSatish Balay   if (!baij->donotstash) {
25759ffdab8SBarry Smith     if (n > baij->n_loc) {
2589566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->in_loc));
2599566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->v_loc));
2609566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->in_loc));
2619566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->v_loc));
26226fbe8dcSKarl Rupp 
26359ffdab8SBarry Smith       baij->n_loc = n;
26459ffdab8SBarry Smith     }
26559ffdab8SBarry Smith     in_loc = baij->in_loc;
26659ffdab8SBarry Smith     v_loc  = baij->v_loc;
267a30f8f8cSSatish Balay   }
268a30f8f8cSSatish Balay 
269a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
270a30f8f8cSSatish Balay     if (im[i] < 0) continue;
2715f80ce2aSJacob Faibussowitsch     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
272a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
273a30f8f8cSSatish Balay       row = im[i] - rstart_orig;                     /* local row index */
274a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
27501b2bd88SHong Zhang         if (im[i] / bs > in[j] / bs) {
27601b2bd88SHong Zhang           if (a->ignore_ltriangular) {
27701b2bd88SHong Zhang             continue; /* ignore lower triangular blocks */
27826fbe8dcSKarl Rupp           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
27901b2bd88SHong Zhang         }
280a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
281a30f8f8cSSatish Balay           col  = in[j] - cstart_orig;                    /* local col index */
2829371c9d4SSatish Balay           brow = row / bs;
2839371c9d4SSatish Balay           bcol = col / bs;
284a30f8f8cSSatish Balay           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
285db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
286db4deed7SKarl Rupp           else value = v[i + j * m];
287d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
2889566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
289f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
290f7d195e4SLawrence Mitchell           continue;
291f7d195e4SLawrence Mitchell         } else {
292f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
293f7d195e4SLawrence Mitchell           /* off-diag entry (B) */
294a30f8f8cSSatish Balay           if (mat->was_assembled) {
29548a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
296a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2979566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
29871730473SSatish Balay             col = col - 1;
299a30f8f8cSSatish Balay #else
30071730473SSatish Balay             col = baij->colmap[in[j] / bs] - 1;
301a30f8f8cSSatish Balay #endif
302a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
3039566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
304a30f8f8cSSatish Balay               col = in[j];
305a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306a30f8f8cSSatish Balay               B     = baij->B;
307a30f8f8cSSatish Balay               b     = (Mat_SeqBAIJ *)(B)->data;
3089371c9d4SSatish Balay               bimax = b->imax;
3099371c9d4SSatish Balay               bi    = b->i;
3109371c9d4SSatish Balay               bilen = b->ilen;
3119371c9d4SSatish Balay               bj    = b->j;
312a30f8f8cSSatish Balay               ba    = b->a;
31371730473SSatish Balay             } else col += in[j] % bs;
314a30f8f8cSSatish Balay           } else col = in[j];
315db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
316db4deed7SKarl Rupp           else value = v[i + j * m];
317d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
3189566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
319a30f8f8cSSatish Balay         }
320a30f8f8cSSatish Balay       }
321a30f8f8cSSatish Balay     } else { /* off processor entry */
3225f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
323a30f8f8cSSatish Balay       if (!baij->donotstash) {
3245080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
325a30f8f8cSSatish Balay         n_loc          = 0;
326a30f8f8cSSatish Balay         for (j = 0; j < n; j++) {
327f65c83cfSHong Zhang           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
328a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
329a30f8f8cSSatish Balay           if (roworiented) {
330a30f8f8cSSatish Balay             v_loc[n_loc] = v[i * n + j];
331a30f8f8cSSatish Balay           } else {
332a30f8f8cSSatish Balay             v_loc[n_loc] = v[j * m + i];
333a30f8f8cSSatish Balay           }
334a30f8f8cSSatish Balay           n_loc++;
335a30f8f8cSSatish Balay         }
3369566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
337a30f8f8cSSatish Balay       }
338a30f8f8cSSatish Balay     }
339a30f8f8cSSatish Balay   }
340a30f8f8cSSatish Balay   PetscFunctionReturn(0);
341a30f8f8cSSatish Balay }
342a30f8f8cSSatish Balay 
343d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
344d71ae5a4SJacob Faibussowitsch {
34536bd2089SBarry Smith   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
34636bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
34736bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
34836bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
34936bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
35036bd2089SBarry Smith   const PetscScalar *value       = v;
35136bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
35236bd2089SBarry Smith 
35336bd2089SBarry Smith   PetscFunctionBegin;
35436bd2089SBarry Smith   if (col < row) {
35536bd2089SBarry Smith     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
35636bd2089SBarry Smith     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
35736bd2089SBarry Smith   }
35836bd2089SBarry Smith   rp    = aj + ai[row];
35936bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
36036bd2089SBarry Smith   rmax  = imax[row];
36136bd2089SBarry Smith   nrow  = ailen[row];
36236bd2089SBarry Smith   value = v;
36336bd2089SBarry Smith   low   = 0;
36436bd2089SBarry Smith   high  = nrow;
36536bd2089SBarry Smith 
36636bd2089SBarry Smith   while (high - low > 7) {
36736bd2089SBarry Smith     t = (low + high) / 2;
36836bd2089SBarry Smith     if (rp[t] > col) high = t;
36936bd2089SBarry Smith     else low = t;
37036bd2089SBarry Smith   }
37136bd2089SBarry Smith   for (i = low; i < high; i++) {
37236bd2089SBarry Smith     if (rp[i] > col) break;
37336bd2089SBarry Smith     if (rp[i] == col) {
37436bd2089SBarry Smith       bap = ap + bs2 * i;
37536bd2089SBarry Smith       if (roworiented) {
37636bd2089SBarry Smith         if (is == ADD_VALUES) {
37736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
378ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
37936bd2089SBarry Smith           }
38036bd2089SBarry Smith         } else {
38136bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
382ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
38336bd2089SBarry Smith           }
38436bd2089SBarry Smith         }
38536bd2089SBarry Smith       } else {
38636bd2089SBarry Smith         if (is == ADD_VALUES) {
38736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
388ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
38936bd2089SBarry Smith           }
39036bd2089SBarry Smith         } else {
39136bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
392ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
39336bd2089SBarry Smith           }
39436bd2089SBarry Smith         }
39536bd2089SBarry Smith       }
39636bd2089SBarry Smith       goto noinsert2;
39736bd2089SBarry Smith     }
39836bd2089SBarry Smith   }
39936bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
4005f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
40136bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4029371c9d4SSatish Balay   N = nrow++ - 1;
4039371c9d4SSatish Balay   high++;
40436bd2089SBarry Smith   /* shift up all the later entries in this row */
4059566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4069566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
40736bd2089SBarry Smith   rp[i] = col;
40836bd2089SBarry Smith   bap   = ap + bs2 * i;
40936bd2089SBarry Smith   if (roworiented) {
41036bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
411ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
41236bd2089SBarry Smith     }
41336bd2089SBarry Smith   } else {
41436bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
415ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
41636bd2089SBarry Smith     }
41736bd2089SBarry Smith   }
41836bd2089SBarry Smith noinsert2:;
41936bd2089SBarry Smith   ailen[row] = nrow;
42036bd2089SBarry Smith   PetscFunctionReturn(0);
42136bd2089SBarry Smith }
42236bd2089SBarry Smith 
42336bd2089SBarry Smith /*
42436bd2089SBarry Smith    This routine is exactly duplicated in mpibaij.c
42536bd2089SBarry Smith */
426d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
427d71ae5a4SJacob Faibussowitsch {
42836bd2089SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
42936bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
43036bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
43136bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
43236bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
43336bd2089SBarry Smith   const PetscScalar *value       = v;
43436bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
43536bd2089SBarry Smith 
43636bd2089SBarry Smith   PetscFunctionBegin;
43736bd2089SBarry Smith   rp    = aj + ai[row];
43836bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
43936bd2089SBarry Smith   rmax  = imax[row];
44036bd2089SBarry Smith   nrow  = ailen[row];
44136bd2089SBarry Smith   low   = 0;
44236bd2089SBarry Smith   high  = nrow;
44336bd2089SBarry Smith   value = v;
44436bd2089SBarry Smith   while (high - low > 7) {
44536bd2089SBarry Smith     t = (low + high) / 2;
44636bd2089SBarry Smith     if (rp[t] > col) high = t;
44736bd2089SBarry Smith     else low = t;
44836bd2089SBarry Smith   }
44936bd2089SBarry Smith   for (i = low; i < high; i++) {
45036bd2089SBarry Smith     if (rp[i] > col) break;
45136bd2089SBarry Smith     if (rp[i] == col) {
45236bd2089SBarry Smith       bap = ap + bs2 * i;
45336bd2089SBarry Smith       if (roworiented) {
45436bd2089SBarry Smith         if (is == ADD_VALUES) {
45536bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
456ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
45736bd2089SBarry Smith           }
45836bd2089SBarry Smith         } else {
45936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
460ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
46136bd2089SBarry Smith           }
46236bd2089SBarry Smith         }
46336bd2089SBarry Smith       } else {
46436bd2089SBarry Smith         if (is == ADD_VALUES) {
46536bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
466ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
46736bd2089SBarry Smith             bap += bs;
46836bd2089SBarry Smith           }
46936bd2089SBarry Smith         } else {
47036bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
471ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
47236bd2089SBarry Smith             bap += bs;
47336bd2089SBarry Smith           }
47436bd2089SBarry Smith         }
47536bd2089SBarry Smith       }
47636bd2089SBarry Smith       goto noinsert2;
47736bd2089SBarry Smith     }
47836bd2089SBarry Smith   }
47936bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
4805f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
48136bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4829371c9d4SSatish Balay   N = nrow++ - 1;
4839371c9d4SSatish Balay   high++;
48436bd2089SBarry Smith   /* shift up all the later entries in this row */
4859566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4869566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
48736bd2089SBarry Smith   rp[i] = col;
48836bd2089SBarry Smith   bap   = ap + bs2 * i;
48936bd2089SBarry Smith   if (roworiented) {
49036bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
491ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
49236bd2089SBarry Smith     }
49336bd2089SBarry Smith   } else {
49436bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
495ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
49636bd2089SBarry Smith     }
49736bd2089SBarry Smith   }
49836bd2089SBarry Smith noinsert2:;
49936bd2089SBarry Smith   ailen[row] = nrow;
50036bd2089SBarry Smith   PetscFunctionReturn(0);
50136bd2089SBarry Smith }
50236bd2089SBarry Smith 
50336bd2089SBarry Smith /*
50436bd2089SBarry Smith     This routine could be optimized by removing the need for the block copy below and passing stride information
50536bd2089SBarry Smith   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
50636bd2089SBarry Smith */
507d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
508d71ae5a4SJacob Faibussowitsch {
5090880e062SHong Zhang   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
510f15d580aSBarry Smith   const MatScalar *value;
511f15d580aSBarry Smith   MatScalar       *barray      = baij->barray;
512ace3abfcSBarry Smith   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
513899cda47SBarry Smith   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
514476417e5SBarry Smith   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
515476417e5SBarry Smith   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
5160880e062SHong Zhang 
517a30f8f8cSSatish Balay   PetscFunctionBegin;
5180880e062SHong Zhang   if (!barray) {
5199566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2, &barray));
5200880e062SHong Zhang     baij->barray = barray;
5210880e062SHong Zhang   }
5220880e062SHong Zhang 
5230880e062SHong Zhang   if (roworiented) {
5240880e062SHong Zhang     stepval = (n - 1) * bs;
5250880e062SHong Zhang   } else {
5260880e062SHong Zhang     stepval = (m - 1) * bs;
5270880e062SHong Zhang   }
5280880e062SHong Zhang   for (i = 0; i < m; i++) {
5290880e062SHong Zhang     if (im[i] < 0) continue;
5306bdcaf15SBarry Smith     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
5310880e062SHong Zhang     if (im[i] >= rstart && im[i] < rend) {
5320880e062SHong Zhang       row = im[i] - rstart;
5330880e062SHong Zhang       for (j = 0; j < n; j++) {
534f3f98c53SJed Brown         if (im[i] > in[j]) {
535f3f98c53SJed Brown           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
536e32f2f54SBarry Smith           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
537f3f98c53SJed Brown         }
5380880e062SHong Zhang         /* If NumCol = 1 then a copy is not required */
5390880e062SHong Zhang         if ((roworiented) && (n == 1)) {
540f15d580aSBarry Smith           barray = (MatScalar *)v + i * bs2;
5410880e062SHong Zhang         } else if ((!roworiented) && (m == 1)) {
542f15d580aSBarry Smith           barray = (MatScalar *)v + j * bs2;
5430880e062SHong Zhang         } else { /* Here a copy is required */
5440880e062SHong Zhang           if (roworiented) {
5450880e062SHong Zhang             value = v + i * (stepval + bs) * bs + j * bs;
5460880e062SHong Zhang           } else {
5470880e062SHong Zhang             value = v + j * (stepval + bs) * bs + i * bs;
5480880e062SHong Zhang           }
5490880e062SHong Zhang           for (ii = 0; ii < bs; ii++, value += stepval) {
550ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
5510880e062SHong Zhang           }
5520880e062SHong Zhang           barray -= bs2;
5530880e062SHong Zhang         }
5540880e062SHong Zhang 
5550880e062SHong Zhang         if (in[j] >= cstart && in[j] < cend) {
5560880e062SHong Zhang           col = in[j] - cstart;
5579566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
558f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
559f7d195e4SLawrence Mitchell           continue;
560f7d195e4SLawrence Mitchell         } else {
561f7d195e4SLawrence Mitchell           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
5620880e062SHong Zhang           if (mat->was_assembled) {
56348a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
5640880e062SHong Zhang 
5652515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
5660880e062SHong Zhang   #if defined(PETSC_USE_CTABLE)
5679371c9d4SSatish Balay             {
5689371c9d4SSatish Balay               PetscInt data;
5699566063dSJacob Faibussowitsch               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
57008401ef6SPierre Jolivet               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
5710880e062SHong Zhang             }
5720880e062SHong Zhang   #else
57308401ef6SPierre Jolivet             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
5740880e062SHong Zhang   #endif
5750880e062SHong Zhang #endif
5760880e062SHong Zhang #if defined(PETSC_USE_CTABLE)
5779566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
5780880e062SHong Zhang             col = (col - 1) / bs;
5790880e062SHong Zhang #else
5800880e062SHong Zhang             col = (baij->colmap[in[j]] - 1) / bs;
5810880e062SHong Zhang #endif
5820880e062SHong Zhang             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
5839566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
5840880e062SHong Zhang               col = in[j];
5850880e062SHong Zhang             }
58626fbe8dcSKarl Rupp           } else col = in[j];
5879566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
5880880e062SHong Zhang         }
5890880e062SHong Zhang       }
5900880e062SHong Zhang     } else {
5915f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
5920880e062SHong Zhang       if (!baij->donotstash) {
5930880e062SHong Zhang         if (roworiented) {
5949566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
5950880e062SHong Zhang         } else {
5969566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
5970880e062SHong Zhang         }
5980880e062SHong Zhang       }
5990880e062SHong Zhang     }
6000880e062SHong Zhang   }
6010880e062SHong Zhang   PetscFunctionReturn(0);
602a30f8f8cSSatish Balay }
603a30f8f8cSSatish Balay 
604d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
605d71ae5a4SJacob Faibussowitsch {
606f3566a2aSHong Zhang   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
607d0f46423SBarry Smith   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
608d0f46423SBarry Smith   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
609a30f8f8cSSatish Balay 
610a30f8f8cSSatish Balay   PetscFunctionBegin;
611a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
61254c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
61354c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
614a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
615a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
616a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
61754c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
61854c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
619a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
620a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
6219566063dSJacob Faibussowitsch           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
622a30f8f8cSSatish Balay         } else {
62348a46eb9SPierre Jolivet           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
6259566063dSJacob Faibussowitsch           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
626a30f8f8cSSatish Balay           data--;
627a30f8f8cSSatish Balay #else
628a30f8f8cSSatish Balay           data = baij->colmap[idxn[j] / bs] - 1;
629a30f8f8cSSatish Balay #endif
630a30f8f8cSSatish Balay           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
631a30f8f8cSSatish Balay           else {
632a30f8f8cSSatish Balay             col = data + idxn[j] % bs;
6339566063dSJacob Faibussowitsch             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
634a30f8f8cSSatish Balay           }
635a30f8f8cSSatish Balay         }
636a30f8f8cSSatish Balay       }
637f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
638a30f8f8cSSatish Balay   }
639a30f8f8cSSatish Balay   PetscFunctionReturn(0);
640a30f8f8cSSatish Balay }
641a30f8f8cSSatish Balay 
642d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
643d71ae5a4SJacob Faibussowitsch {
644a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
645a30f8f8cSSatish Balay   PetscReal     sum[2], *lnorm2;
646a30f8f8cSSatish Balay 
647a30f8f8cSSatish Balay   PetscFunctionBegin;
648a30f8f8cSSatish Balay   if (baij->size == 1) {
6499566063dSJacob Faibussowitsch     PetscCall(MatNorm(baij->A, type, norm));
650a30f8f8cSSatish Balay   } else {
651a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
6529566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(2, &lnorm2));
6539566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->A, type, lnorm2));
6549371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
6559371c9d4SSatish Balay       lnorm2++; /* squar power of norm(A) */
6569566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->B, type, lnorm2));
6579371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
6589371c9d4SSatish Balay       lnorm2--; /* squar power of norm(B) */
6591c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
6608f1a2a5eSBarry Smith       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
6619566063dSJacob Faibussowitsch       PetscCall(PetscFree(lnorm2));
6620b8dc8d2SHong Zhang     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
6630b8dc8d2SHong Zhang       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
6640b8dc8d2SHong Zhang       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
6650b8dc8d2SHong Zhang       PetscReal    *rsum, *rsum2, vabs;
666899cda47SBarry Smith       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
667d0f46423SBarry Smith       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
6680b8dc8d2SHong Zhang       MatScalar    *v;
6690b8dc8d2SHong Zhang 
6709566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
6719566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
6720b8dc8d2SHong Zhang       /* Amat */
6739371c9d4SSatish Balay       v  = amat->a;
6749371c9d4SSatish Balay       jj = amat->j;
6750b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
6760b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
6770b8dc8d2SHong Zhang         nz   = amat->i[brow + 1] - amat->i[brow];
6780b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
6799371c9d4SSatish Balay           gcol = bs * (rstart + *jj);
6809371c9d4SSatish Balay           jj++;
6810b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
6820b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
6839371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
6849371c9d4SSatish Balay               v++;
6850b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
6860b8dc8d2SHong Zhang               /* non-diagonal block */
6870b8dc8d2SHong Zhang               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
6880b8dc8d2SHong Zhang             }
6890b8dc8d2SHong Zhang           }
6900b8dc8d2SHong Zhang         }
6919566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
6920b8dc8d2SHong Zhang       }
6930b8dc8d2SHong Zhang       /* Bmat */
6949371c9d4SSatish Balay       v  = bmat->a;
6959371c9d4SSatish Balay       jj = bmat->j;
6960b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
6970b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
6980b8dc8d2SHong Zhang         nz   = bmat->i[brow + 1] - bmat->i[brow];
6990b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
7009371c9d4SSatish Balay           gcol = bs * garray[*jj];
7019371c9d4SSatish Balay           jj++;
7020b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7030b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7049371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7059371c9d4SSatish Balay               v++;
7060b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7070b8dc8d2SHong Zhang               rsum[grow + row] += vabs;
7080b8dc8d2SHong Zhang             }
7090b8dc8d2SHong Zhang           }
7100b8dc8d2SHong Zhang         }
7119566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7120b8dc8d2SHong Zhang       }
7131c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7140b8dc8d2SHong Zhang       *norm = 0.0;
715d0f46423SBarry Smith       for (col = 0; col < mat->cmap->N; col++) {
7160b8dc8d2SHong Zhang         if (rsum2[col] > *norm) *norm = rsum2[col];
7170b8dc8d2SHong Zhang       }
7189566063dSJacob Faibussowitsch       PetscCall(PetscFree2(rsum, rsum2));
719f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
720a30f8f8cSSatish Balay   }
721a30f8f8cSSatish Balay   PetscFunctionReturn(0);
722a30f8f8cSSatish Balay }
723a30f8f8cSSatish Balay 
724d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
725d71ae5a4SJacob Faibussowitsch {
726a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
7271302d50aSBarry Smith   PetscInt      nstash, reallocs;
728a30f8f8cSSatish Balay 
729a30f8f8cSSatish Balay   PetscFunctionBegin;
73026fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
731a30f8f8cSSatish Balay 
7329566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
7339566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
7349566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7359566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
7369566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7379566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
738a30f8f8cSSatish Balay   PetscFunctionReturn(0);
739a30f8f8cSSatish Balay }
740a30f8f8cSSatish Balay 
741d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
742d71ae5a4SJacob Faibussowitsch {
743a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
744a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
74513f74950SBarry Smith   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
746e44c0bd4SBarry Smith   PetscInt     *row, *col;
747ace3abfcSBarry Smith   PetscBool     other_disassembled;
74813f74950SBarry Smith   PetscMPIInt   n;
749ace3abfcSBarry Smith   PetscBool     r1, r2, r3;
750a30f8f8cSSatish Balay   MatScalar    *val;
751a30f8f8cSSatish Balay 
75291c97fd4SSatish Balay   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
753a30f8f8cSSatish Balay   PetscFunctionBegin;
7544cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
755a30f8f8cSSatish Balay     while (1) {
7569566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
757a30f8f8cSSatish Balay       if (!flg) break;
758a30f8f8cSSatish Balay 
759a30f8f8cSSatish Balay       for (i = 0; i < n;) {
760a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
76126fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
76226fbe8dcSKarl Rupp           if (row[j] != rstart) break;
76326fbe8dcSKarl Rupp         }
764a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
765a30f8f8cSSatish Balay         else ncols = n - i;
766a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
7679566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
768a30f8f8cSSatish Balay         i = j;
769a30f8f8cSSatish Balay       }
770a30f8f8cSSatish Balay     }
7719566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
772a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
773a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
774a30f8f8cSSatish Balay        restore the original flags */
775a30f8f8cSSatish Balay     r1 = baij->roworiented;
776a30f8f8cSSatish Balay     r2 = a->roworiented;
77791c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
77826fbe8dcSKarl Rupp 
779a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
780a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
78126fbe8dcSKarl Rupp 
78291c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
783a30f8f8cSSatish Balay     while (1) {
7849566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
785a30f8f8cSSatish Balay       if (!flg) break;
786a30f8f8cSSatish Balay 
787a30f8f8cSSatish Balay       for (i = 0; i < n;) {
788a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
78926fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
79026fbe8dcSKarl Rupp           if (row[j] != rstart) break;
79126fbe8dcSKarl Rupp         }
792a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
793a30f8f8cSSatish Balay         else ncols = n - i;
7949566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
795a30f8f8cSSatish Balay         i = j;
796a30f8f8cSSatish Balay       }
797a30f8f8cSSatish Balay     }
7989566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
79926fbe8dcSKarl Rupp 
800a30f8f8cSSatish Balay     baij->roworiented = r1;
801a30f8f8cSSatish Balay     a->roworiented    = r2;
80226fbe8dcSKarl Rupp 
80391c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
804a30f8f8cSSatish Balay   }
805a30f8f8cSSatish Balay 
8069566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->A, mode));
8079566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->A, mode));
808a30f8f8cSSatish Balay 
809a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
8106aad120cSJose E. Roman      also disassemble ourselves, in order that we may reassemble. */
811a30f8f8cSSatish Balay   /*
812a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
813a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
814a30f8f8cSSatish Balay   */
815a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
8165f9db2b2SJunchao Zhang     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
81748a46eb9SPierre Jolivet     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
818a30f8f8cSSatish Balay   }
819a30f8f8cSSatish Balay 
8209371c9d4SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
8219566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->B, mode));
8229566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->B, mode));
823a30f8f8cSSatish Balay 
8249566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
82526fbe8dcSKarl Rupp 
826f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
8274f9cfa9eSBarry Smith 
8284f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
8294f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
830e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
8311c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
832e56f5c9eSBarry Smith   }
833a30f8f8cSSatish Balay   PetscFunctionReturn(0);
834a30f8f8cSSatish Balay }
835a30f8f8cSSatish Balay 
836dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
8379804daf3SBarry Smith #include <petscdraw.h>
838d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
839d71ae5a4SJacob Faibussowitsch {
840a30f8f8cSSatish Balay   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
841d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
8427da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
843ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
844b0a32e0cSBarry Smith   PetscViewer       sviewer;
845f3ef73ceSBarry Smith   PetscViewerFormat format;
846a30f8f8cSSatish Balay 
847a30f8f8cSSatish Balay   PetscFunctionBegin;
8489566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
8499566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
85032077d6dSBarry Smith   if (iascii) {
8519566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
852456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
853a30f8f8cSSatish Balay       MatInfo info;
8549566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
8559566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
8569566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
8579371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
8589371c9d4SSatish Balay                                                    mat->rmap->bs, (double)info.memory));
8599566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
8609566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
8619566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
8629566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
8639566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
8649566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
8659566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
8669566063dSJacob Faibussowitsch       PetscCall(VecScatterView(baij->Mvctx, viewer));
867a30f8f8cSSatish Balay       PetscFunctionReturn(0);
868fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
8699566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
870a30f8f8cSSatish Balay       PetscFunctionReturn(0);
871c1490034SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
872c1490034SHong Zhang       PetscFunctionReturn(0);
873a30f8f8cSSatish Balay     }
874a30f8f8cSSatish Balay   }
875a30f8f8cSSatish Balay 
876a30f8f8cSSatish Balay   if (isdraw) {
877b0a32e0cSBarry Smith     PetscDraw draw;
878ace3abfcSBarry Smith     PetscBool isnull;
8799566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
8809566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
88145f3bb6eSLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
882a30f8f8cSSatish Balay   }
883a30f8f8cSSatish Balay 
8847da1fb6eSBarry Smith   {
885a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
886a30f8f8cSSatish Balay     Mat           A;
88765d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
88865d70643SHong Zhang     Mat_SeqBAIJ  *Bloc;
889d0f46423SBarry Smith     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
890a30f8f8cSSatish Balay     MatScalar    *a;
8913e219373SBarry Smith     const char   *matname;
892a30f8f8cSSatish Balay 
893f204ca49SKris Buschelman     /* Should this be the same type as mat? */
8949566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
895dd400576SPatrick Sanan     if (rank == 0) {
8969566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
897a30f8f8cSSatish Balay     } else {
8989566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
899a30f8f8cSSatish Balay     }
9009566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPISBAIJ));
9019566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
9029566063dSJacob Faibussowitsch     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
903a30f8f8cSSatish Balay 
904a30f8f8cSSatish Balay     /* copy over the A part */
90565d70643SHong Zhang     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
9069371c9d4SSatish Balay     ai   = Aloc->i;
9079371c9d4SSatish Balay     aj   = Aloc->j;
9089371c9d4SSatish Balay     a    = Aloc->a;
9099566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs, &rvals));
910a30f8f8cSSatish Balay 
911a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
912e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
91326fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
914a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
915e9f7bc9eSHong Zhang         col = (baij->cstartbs + aj[j]) * bs;
916a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9179566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
91826fbe8dcSKarl Rupp           col++;
91926fbe8dcSKarl Rupp           a += bs;
920a30f8f8cSSatish Balay         }
921a30f8f8cSSatish Balay       }
922a30f8f8cSSatish Balay     }
923a30f8f8cSSatish Balay     /* copy over the B part */
92465d70643SHong Zhang     Bloc = (Mat_SeqBAIJ *)baij->B->data;
9259371c9d4SSatish Balay     ai   = Bloc->i;
9269371c9d4SSatish Balay     aj   = Bloc->j;
9279371c9d4SSatish Balay     a    = Bloc->a;
928a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
929e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
93026fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
931a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
932a30f8f8cSSatish Balay         col = baij->garray[aj[j]] * bs;
933a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9349566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
93526fbe8dcSKarl Rupp           col++;
93626fbe8dcSKarl Rupp           a += bs;
937a30f8f8cSSatish Balay         }
938a30f8f8cSSatish Balay       }
939a30f8f8cSSatish Balay     }
9409566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvals));
9419566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
9429566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
943a30f8f8cSSatish Balay     /*
944a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
945b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
946a30f8f8cSSatish Balay     */
9479566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
9489566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
949dd400576SPatrick Sanan     if (rank == 0) {
9509566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
9519566063dSJacob Faibussowitsch       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
952a30f8f8cSSatish Balay     }
9539566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
9549566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
9559566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
956a30f8f8cSSatish Balay   }
957a30f8f8cSSatish Balay   PetscFunctionReturn(0);
958a30f8f8cSSatish Balay }
959a30f8f8cSSatish Balay 
960618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
961618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
962d1654148SHong Zhang 
963d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
964d71ae5a4SJacob Faibussowitsch {
965ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
966a30f8f8cSSatish Balay 
967a30f8f8cSSatish Balay   PetscFunctionBegin;
9689566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
9699566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
9709566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
9719566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
972d1654148SHong Zhang   if (iascii || isdraw || issocket) {
9739566063dSJacob Faibussowitsch     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
9741baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
975a30f8f8cSSatish Balay   PetscFunctionReturn(0);
976a30f8f8cSSatish Balay }
977a30f8f8cSSatish Balay 
978d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
979d71ae5a4SJacob Faibussowitsch {
980a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
981a30f8f8cSSatish Balay 
982a30f8f8cSSatish Balay   PetscFunctionBegin;
983a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
984c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
985a30f8f8cSSatish Balay #endif
9869566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
9879566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->bstash));
9889566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&baij->A));
9899566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&baij->B));
990a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
9919566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&baij->colmap));
992a30f8f8cSSatish Balay #else
9939566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->colmap));
994a30f8f8cSSatish Balay #endif
9959566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
9969566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->lvec));
9979566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->Mvctx));
9989566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0));
9999566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0b));
10009566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1));
10019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1a));
10029566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1b));
10039566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->sMvctx));
10049566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
10059566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->barray));
10069566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->hd));
10079566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->diag));
10089566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->bb1));
10099566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->xx1));
1010ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
10119566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->setvaluescopy));
1012a30f8f8cSSatish Balay #endif
10139566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->in_loc));
10149566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->v_loc));
10159566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->rangebs));
10169566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
1017901853e0SKris Buschelman 
10189566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
10199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
10209566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
10219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
10229566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
10236214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
10249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
10256214f412SHong Zhang #endif
1026d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
10279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
1028d24d4204SJose E. Roman #endif
10299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
10309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
1031a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1032a30f8f8cSSatish Balay }
1033a30f8f8cSSatish Balay 
1034d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1035d71ae5a4SJacob Faibussowitsch {
1036547795f9SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1037eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
10386de40e93SBarry Smith   PetscScalar       *from;
10396de40e93SBarry Smith   const PetscScalar *x;
1040547795f9SHong Zhang 
1041547795f9SHong Zhang   PetscFunctionBegin;
1042547795f9SHong Zhang   /* diagonal part */
10439566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
10449566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, 0.0));
1045547795f9SHong Zhang 
1046547795f9SHong Zhang   /* subdiagonal part */
10475f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
10489566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1049547795f9SHong Zhang 
1050547795f9SHong Zhang   /* copy x into the vec slvec0 */
10519566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1053547795f9SHong Zhang 
10549566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1057547795f9SHong Zhang 
10589566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10599566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1060547795f9SHong Zhang   /* supperdiagonal part */
10619566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1062547795f9SHong Zhang   PetscFunctionReturn(0);
1063547795f9SHong Zhang }
1064547795f9SHong Zhang 
1065d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1066d71ae5a4SJacob Faibussowitsch {
1067a9d4b620SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1068eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1069d9ca1df4SBarry Smith   PetscScalar       *from;
1070d9ca1df4SBarry Smith   const PetscScalar *x;
1071a9d4b620SHong Zhang 
1072a9d4b620SHong Zhang   PetscFunctionBegin;
1073a9d4b620SHong Zhang   /* diagonal part */
10749566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
10759566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, 0.0));
1076a9d4b620SHong Zhang 
1077a9d4b620SHong Zhang   /* subdiagonal part */
10789566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1079fc165ae2SBarry Smith 
1080a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
10819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1083a9d4b620SHong Zhang 
10849566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1087fc165ae2SBarry Smith 
10889566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10899566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1090a9d4b620SHong Zhang   /* supperdiagonal part */
10919566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1092a9d4b620SHong Zhang   PetscFunctionReturn(0);
1093a9d4b620SHong Zhang }
1094a9d4b620SHong Zhang 
1095d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1096d71ae5a4SJacob Faibussowitsch {
1097eb1ec7c1SStefano Zampini   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1098eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1099eb1ec7c1SStefano Zampini   PetscScalar       *from, zero       = 0.0;
1100eb1ec7c1SStefano Zampini   const PetscScalar *x;
1101eb1ec7c1SStefano Zampini 
1102eb1ec7c1SStefano Zampini   PetscFunctionBegin;
1103eb1ec7c1SStefano Zampini   /* diagonal part */
11049566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
11059566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, zero));
1106eb1ec7c1SStefano Zampini 
1107eb1ec7c1SStefano Zampini   /* subdiagonal part */
11085f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
11099566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1110eb1ec7c1SStefano Zampini 
1111eb1ec7c1SStefano Zampini   /* copy x into the vec slvec0 */
11129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11139566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11149566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1116eb1ec7c1SStefano Zampini 
11179566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11199566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1120eb1ec7c1SStefano Zampini 
1121eb1ec7c1SStefano Zampini   /* supperdiagonal part */
11229566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1123eb1ec7c1SStefano Zampini   PetscFunctionReturn(0);
1124eb1ec7c1SStefano Zampini }
1125eb1ec7c1SStefano Zampini 
1126d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1127d71ae5a4SJacob Faibussowitsch {
1128de8b6608SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1129d0f46423SBarry Smith   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1130d9ca1df4SBarry Smith   PetscScalar       *from, zero       = 0.0;
1131d9ca1df4SBarry Smith   const PetscScalar *x;
1132a9d4b620SHong Zhang 
1133a9d4b620SHong Zhang   PetscFunctionBegin;
1134a9d4b620SHong Zhang   /* diagonal part */
11359566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
11369566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, zero));
1137a9d4b620SHong Zhang 
1138a9d4b620SHong Zhang   /* subdiagonal part */
11399566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1140a9d4b620SHong Zhang 
1141a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
11429566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11449566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1146a9d4b620SHong Zhang 
11479566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11499566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1150a9d4b620SHong Zhang 
1151a9d4b620SHong Zhang   /* supperdiagonal part */
11529566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1153a9d4b620SHong Zhang   PetscFunctionReturn(0);
1154a9d4b620SHong Zhang }
1155a9d4b620SHong Zhang 
1156a30f8f8cSSatish Balay /*
1157a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1158a30f8f8cSSatish Balay    diagonal block
1159a30f8f8cSSatish Balay */
1160d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1161d71ae5a4SJacob Faibussowitsch {
1162a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1163a30f8f8cSSatish Balay 
1164a30f8f8cSSatish Balay   PetscFunctionBegin;
116508401ef6SPierre Jolivet   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
11669566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A, v));
1167a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1168a30f8f8cSSatish Balay }
1169a30f8f8cSSatish Balay 
1170d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1171d71ae5a4SJacob Faibussowitsch {
1172a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1173a30f8f8cSSatish Balay 
1174a30f8f8cSSatish Balay   PetscFunctionBegin;
11759566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
11769566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
1177a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1178a30f8f8cSSatish Balay }
1179a30f8f8cSSatish Balay 
1180d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1181d71ae5a4SJacob Faibussowitsch {
1182d0d4cfc2SHong Zhang   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1183d0d4cfc2SHong Zhang   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1184d0f46423SBarry Smith   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1185d0f46423SBarry Smith   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1186899cda47SBarry Smith   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1187d0d4cfc2SHong Zhang 
1188a30f8f8cSSatish Balay   PetscFunctionBegin;
11895f80ce2aSJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1190d0d4cfc2SHong Zhang   mat->getrowactive = PETSC_TRUE;
1191d0d4cfc2SHong Zhang 
1192d0d4cfc2SHong Zhang   if (!mat->rowvalues && (idx || v)) {
1193d0d4cfc2SHong Zhang     /*
1194d0d4cfc2SHong Zhang         allocate enough space to hold information from the longest row.
1195d0d4cfc2SHong Zhang     */
1196d0d4cfc2SHong Zhang     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1197d0d4cfc2SHong Zhang     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1198d0d4cfc2SHong Zhang     PetscInt      max = 1, mbs = mat->mbs, tmp;
1199d0d4cfc2SHong Zhang     for (i = 0; i < mbs; i++) {
1200d0d4cfc2SHong Zhang       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
120126fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1202d0d4cfc2SHong Zhang     }
12039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1204d0d4cfc2SHong Zhang   }
1205d0d4cfc2SHong Zhang 
12065f80ce2aSJacob Faibussowitsch   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1207d0d4cfc2SHong Zhang   lrow = row - brstart; /* local row index */
1208d0d4cfc2SHong Zhang 
12099371c9d4SSatish Balay   pvA = &vworkA;
12109371c9d4SSatish Balay   pcA = &cworkA;
12119371c9d4SSatish Balay   pvB = &vworkB;
12129371c9d4SSatish Balay   pcB = &cworkB;
12139371c9d4SSatish Balay   if (!v) {
12149371c9d4SSatish Balay     pvA = NULL;
12159371c9d4SSatish Balay     pvB = NULL;
12169371c9d4SSatish Balay   }
12179371c9d4SSatish Balay   if (!idx) {
12189371c9d4SSatish Balay     pcA = NULL;
12199371c9d4SSatish Balay     if (!v) pcB = NULL;
12209371c9d4SSatish Balay   }
12219566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
12229566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1223d0d4cfc2SHong Zhang   nztot = nzA + nzB;
1224d0d4cfc2SHong Zhang 
1225d0d4cfc2SHong Zhang   cmap = mat->garray;
1226d0d4cfc2SHong Zhang   if (v || idx) {
1227d0d4cfc2SHong Zhang     if (nztot) {
1228d0d4cfc2SHong Zhang       /* Sort by increasing column numbers, assuming A and B already sorted */
1229d0d4cfc2SHong Zhang       PetscInt imark = -1;
1230d0d4cfc2SHong Zhang       if (v) {
1231d0d4cfc2SHong Zhang         *v = v_p = mat->rowvalues;
1232d0d4cfc2SHong Zhang         for (i = 0; i < nzB; i++) {
1233d0d4cfc2SHong Zhang           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1234d0d4cfc2SHong Zhang           else break;
1235d0d4cfc2SHong Zhang         }
1236d0d4cfc2SHong Zhang         imark = i;
1237d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1238d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1239d0d4cfc2SHong Zhang       }
1240d0d4cfc2SHong Zhang       if (idx) {
1241d0d4cfc2SHong Zhang         *idx = idx_p = mat->rowindices;
1242d0d4cfc2SHong Zhang         if (imark > -1) {
1243ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1244d0d4cfc2SHong Zhang         } else {
1245d0d4cfc2SHong Zhang           for (i = 0; i < nzB; i++) {
124626fbe8dcSKarl Rupp             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1247d0d4cfc2SHong Zhang             else break;
1248d0d4cfc2SHong Zhang           }
1249d0d4cfc2SHong Zhang           imark = i;
1250d0d4cfc2SHong Zhang         }
1251d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1252d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253d0d4cfc2SHong Zhang       }
1254d0d4cfc2SHong Zhang     } else {
1255f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1256f4259b30SLisandro Dalcin       if (v) *v = NULL;
1257d0d4cfc2SHong Zhang     }
1258d0d4cfc2SHong Zhang   }
1259d0d4cfc2SHong Zhang   *nz = nztot;
12609566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
12619566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1262a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1263a30f8f8cSSatish Balay }
1264a30f8f8cSSatish Balay 
1265d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1266d71ae5a4SJacob Faibussowitsch {
1267a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1268a30f8f8cSSatish Balay 
1269a30f8f8cSSatish Balay   PetscFunctionBegin;
12705f80ce2aSJacob Faibussowitsch   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1271a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
1272a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1273a30f8f8cSSatish Balay }
1274a30f8f8cSSatish Balay 
1275d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1276d71ae5a4SJacob Faibussowitsch {
1277d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1278d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1279d0d4cfc2SHong Zhang 
1280d0d4cfc2SHong Zhang   PetscFunctionBegin;
1281d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_TRUE;
1282d0d4cfc2SHong Zhang   PetscFunctionReturn(0);
1283d0d4cfc2SHong Zhang }
1284d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1285d71ae5a4SJacob Faibussowitsch {
1286d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1287d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1288d0d4cfc2SHong Zhang 
1289d0d4cfc2SHong Zhang   PetscFunctionBegin;
1290d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_FALSE;
1291d0d4cfc2SHong Zhang   PetscFunctionReturn(0);
1292d0d4cfc2SHong Zhang }
1293d0d4cfc2SHong Zhang 
1294d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1295d71ae5a4SJacob Faibussowitsch {
12965f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
12975f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
12982726fb6dSPierre Jolivet     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
12992726fb6dSPierre Jolivet 
13009566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->A));
13019566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->B));
13025f80ce2aSJacob Faibussowitsch   }
13032726fb6dSPierre Jolivet   PetscFunctionReturn(0);
13042726fb6dSPierre Jolivet }
13052726fb6dSPierre Jolivet 
1306d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1307d71ae5a4SJacob Faibussowitsch {
130899cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
130999cafbc1SBarry Smith 
131099cafbc1SBarry Smith   PetscFunctionBegin;
13119566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
13129566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
131399cafbc1SBarry Smith   PetscFunctionReturn(0);
131499cafbc1SBarry Smith }
131599cafbc1SBarry Smith 
1316d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1317d71ae5a4SJacob Faibussowitsch {
131899cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
131999cafbc1SBarry Smith 
132099cafbc1SBarry Smith   PetscFunctionBegin;
13219566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
13229566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
132399cafbc1SBarry Smith   PetscFunctionReturn(0);
132499cafbc1SBarry Smith }
132599cafbc1SBarry Smith 
13267dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
132736032a97SHong Zhang    Input: isrow       - distributed(parallel),
132836032a97SHong Zhang           iscol_local - locally owned (seq)
132936032a97SHong Zhang */
1330d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1331d71ae5a4SJacob Faibussowitsch {
13328f46ffcaSHong Zhang   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
13338f46ffcaSHong Zhang   const PetscInt *ptr1, *ptr2;
133436032a97SHong Zhang 
133536032a97SHong Zhang   PetscFunctionBegin;
13369566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &sz1));
13379566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_local, &sz2));
13381098a8e8SHong Zhang   if (sz1 > sz2) {
13391098a8e8SHong Zhang     *flg = PETSC_FALSE;
13401098a8e8SHong Zhang     PetscFunctionReturn(0);
13411098a8e8SHong Zhang   }
13428f46ffcaSHong Zhang 
13439566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &ptr1));
13449566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &ptr2));
13458f46ffcaSHong Zhang 
13469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz1, &a1));
13479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz2, &a2));
13489566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a1, ptr1, sz1));
13499566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a2, ptr2, sz2));
13509566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz1, a1));
13519566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz2, a2));
13528f46ffcaSHong Zhang 
13538f46ffcaSHong Zhang   nmatch = 0;
13548f46ffcaSHong Zhang   k      = 0;
13558f46ffcaSHong Zhang   for (i = 0; i < sz1; i++) {
13568f46ffcaSHong Zhang     for (j = k; j < sz2; j++) {
13578f46ffcaSHong Zhang       if (a1[i] == a2[j]) {
13589371c9d4SSatish Balay         k = j;
13599371c9d4SSatish Balay         nmatch++;
13608f46ffcaSHong Zhang         break;
13618f46ffcaSHong Zhang       }
13628f46ffcaSHong Zhang     }
13638f46ffcaSHong Zhang   }
13649566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &ptr1));
13659566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
13669566063dSJacob Faibussowitsch   PetscCall(PetscFree(a1));
13679566063dSJacob Faibussowitsch   PetscCall(PetscFree(a2));
13681098a8e8SHong Zhang   if (nmatch < sz1) {
13691098a8e8SHong Zhang     *flg = PETSC_FALSE;
13701098a8e8SHong Zhang   } else {
13711098a8e8SHong Zhang     *flg = PETSC_TRUE;
13721098a8e8SHong Zhang   }
137336032a97SHong Zhang   PetscFunctionReturn(0);
13748f46ffcaSHong Zhang }
137536032a97SHong Zhang 
1376d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1377d71ae5a4SJacob Faibussowitsch {
137836032a97SHong Zhang   IS        iscol_local;
137936032a97SHong Zhang   PetscInt  csize;
138036032a97SHong Zhang   PetscBool isequal;
138136032a97SHong Zhang 
138236032a97SHong Zhang   PetscFunctionBegin;
13839566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
138436032a97SHong Zhang   if (call == MAT_REUSE_MATRIX) {
13859566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
13865f80ce2aSJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
138736032a97SHong Zhang   } else {
1388068661f9SToby Isaac     PetscBool issorted;
1389068661f9SToby Isaac 
13909566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
13919566063dSJacob Faibussowitsch     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
13929566063dSJacob Faibussowitsch     PetscCall(ISSorted(iscol_local, &issorted));
13935f80ce2aSJacob Faibussowitsch     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
13948f46ffcaSHong Zhang   }
13958f46ffcaSHong Zhang 
13967dae84e0SHong Zhang   /* now call MatCreateSubMatrix_MPIBAIJ() */
13979566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
13988f46ffcaSHong Zhang   if (call == MAT_INITIAL_MATRIX) {
13999566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
14009566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
14018f46ffcaSHong Zhang   }
14028f46ffcaSHong Zhang   PetscFunctionReturn(0);
14038f46ffcaSHong Zhang }
14048f46ffcaSHong Zhang 
1405d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1406d71ae5a4SJacob Faibussowitsch {
1407a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1408a30f8f8cSSatish Balay 
1409a30f8f8cSSatish Balay   PetscFunctionBegin;
14109566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
14119566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
1412a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1413a30f8f8cSSatish Balay }
1414a30f8f8cSSatish Balay 
1415d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1416d71ae5a4SJacob Faibussowitsch {
1417a30f8f8cSSatish Balay   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1418a30f8f8cSSatish Balay   Mat            A = a->A, B = a->B;
14193966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
1420a30f8f8cSSatish Balay 
1421a30f8f8cSSatish Balay   PetscFunctionBegin;
1422d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
142326fbe8dcSKarl Rupp 
14249566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
142526fbe8dcSKarl Rupp 
14269371c9d4SSatish Balay   isend[0] = info->nz_used;
14279371c9d4SSatish Balay   isend[1] = info->nz_allocated;
14289371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
14299371c9d4SSatish Balay   isend[3] = info->memory;
14309371c9d4SSatish Balay   isend[4] = info->mallocs;
143126fbe8dcSKarl Rupp 
14329566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
143326fbe8dcSKarl Rupp 
14349371c9d4SSatish Balay   isend[0] += info->nz_used;
14359371c9d4SSatish Balay   isend[1] += info->nz_allocated;
14369371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
14379371c9d4SSatish Balay   isend[3] += info->memory;
14389371c9d4SSatish Balay   isend[4] += info->mallocs;
1439a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1440a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1441a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1442a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1443a30f8f8cSSatish Balay     info->memory       = isend[3];
1444a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1445a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14461c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
144726fbe8dcSKarl Rupp 
1448a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1449a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1450a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1451a30f8f8cSSatish Balay     info->memory       = irecv[3];
1452a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1453a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14541c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
145526fbe8dcSKarl Rupp 
1456a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1457a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1458a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1459a30f8f8cSSatish Balay     info->memory       = irecv[3];
1460a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
146198921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1462a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1463a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1464a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
1465a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1466a30f8f8cSSatish Balay }
1467a30f8f8cSSatish Balay 
1468d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1469d71ae5a4SJacob Faibussowitsch {
1470a30f8f8cSSatish Balay   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1471d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1472a30f8f8cSSatish Balay 
1473a30f8f8cSSatish Balay   PetscFunctionBegin;
1474e98b92d7SKris Buschelman   switch (op) {
1475512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1476e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
147728b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1478a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1479c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1480e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
148143674050SBarry Smith     MatCheckPreallocated(A, 1);
14829566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14839566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1484e98b92d7SKris Buschelman     break;
1485e98b92d7SKris Buschelman   case MAT_ROW_ORIENTED:
148643674050SBarry Smith     MatCheckPreallocated(A, 1);
14874e0d8c25SBarry Smith     a->roworiented = flg;
148826fbe8dcSKarl Rupp 
14899566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14909566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1491e98b92d7SKris Buschelman     break;
14928c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1493d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1494d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1495d71ae5a4SJacob Faibussowitsch     break;
1496d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
1497d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
1498d71ae5a4SJacob Faibussowitsch     break;
1499d71ae5a4SJacob Faibussowitsch   case MAT_USE_HASH_TABLE:
1500d71ae5a4SJacob Faibussowitsch     a->ht_flag = flg;
1501d71ae5a4SJacob Faibussowitsch     break;
1502d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
1503d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1504d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
15050f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1506eb1ec7c1SStefano Zampini     if (flg) { /* need different mat-vec ops */
1507547795f9SHong Zhang       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1508eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1509eb1ec7c1SStefano Zampini       A->ops->multtranspose    = NULL;
1510eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = NULL;
1511b94d7dedSBarry Smith       A->symmetric             = PETSC_BOOL3_FALSE;
1512eb1ec7c1SStefano Zampini     }
15130f2140c7SStefano Zampini #endif
1514eeffb40dSHong Zhang     break;
1515ffa07934SHong Zhang   case MAT_SPD:
1516d71ae5a4SJacob Faibussowitsch   case MAT_SYMMETRIC:
1517d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1518d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1519eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1520eb1ec7c1SStefano Zampini     if (flg) { /* restore to use default mat-vec ops */
1521eb1ec7c1SStefano Zampini       A->ops->mult             = MatMult_MPISBAIJ;
1522eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1523eb1ec7c1SStefano Zampini       A->ops->multtranspose    = MatMult_MPISBAIJ;
1524eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1525eb1ec7c1SStefano Zampini     }
1526eb1ec7c1SStefano Zampini #endif
1527eeffb40dSHong Zhang     break;
152877e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
152943674050SBarry Smith     MatCheckPreallocated(A, 1);
15309566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1531eeffb40dSHong Zhang     break;
15329a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1533b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
15345f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
15359566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
153677e54ba9SKris Buschelman     break;
1537d71ae5a4SJacob Faibussowitsch   case MAT_SPD_ETERNAL:
1538d71ae5a4SJacob Faibussowitsch     break;
1539d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_LOWER_TRIANGULAR:
1540d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1541d71ae5a4SJacob Faibussowitsch     break;
1542d71ae5a4SJacob Faibussowitsch   case MAT_ERROR_LOWER_TRIANGULAR:
1543d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1544d71ae5a4SJacob Faibussowitsch     break;
1545d71ae5a4SJacob Faibussowitsch   case MAT_GETROW_UPPERTRIANGULAR:
1546d71ae5a4SJacob Faibussowitsch     aA->getrow_utriangular = flg;
1547d71ae5a4SJacob Faibussowitsch     break;
1548d71ae5a4SJacob Faibussowitsch   default:
1549d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1550a30f8f8cSSatish Balay   }
1551a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1552a30f8f8cSSatish Balay }
1553a30f8f8cSSatish Balay 
1554d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1555d71ae5a4SJacob Faibussowitsch {
1556a30f8f8cSSatish Balay   PetscFunctionBegin;
15577fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1558cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
15599566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1560cf37664fSBarry Smith   } else if (reuse == MAT_REUSE_MATRIX) {
15619566063dSJacob Faibussowitsch     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1562fc4dec0aSBarry Smith   }
15638115998fSBarry Smith   PetscFunctionReturn(0);
1564a30f8f8cSSatish Balay }
1565a30f8f8cSSatish Balay 
1566d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1567d71ae5a4SJacob Faibussowitsch {
1568a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1569a30f8f8cSSatish Balay   Mat           a = baij->A, b = baij->B;
15705e90f9d9SHong Zhang   PetscInt      nv, m, n;
1571ace3abfcSBarry Smith   PetscBool     flg;
1572a30f8f8cSSatish Balay 
1573a30f8f8cSSatish Balay   PetscFunctionBegin;
1574a30f8f8cSSatish Balay   if (ll != rr) {
15759566063dSJacob Faibussowitsch     PetscCall(VecEqual(ll, rr, &flg));
15765f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1577a30f8f8cSSatish Balay   }
1578b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
1579b3bf805bSHong Zhang 
15809566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
15815f80ce2aSJacob Faibussowitsch   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1582b3bf805bSHong Zhang 
15839566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(rr, &nv));
15845f80ce2aSJacob Faibussowitsch   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
15855e90f9d9SHong Zhang 
15869566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
15875e90f9d9SHong Zhang 
15885e90f9d9SHong Zhang   /* left diagonalscale the off-diagonal part */
1589dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
15905e90f9d9SHong Zhang 
15915e90f9d9SHong Zhang   /* scale the diagonal part */
1592dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1593a30f8f8cSSatish Balay 
15945e90f9d9SHong Zhang   /* right diagonalscale the off-diagonal part */
15959566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1596dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1597a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1598a30f8f8cSSatish Balay }
1599a30f8f8cSSatish Balay 
1600d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1601d71ae5a4SJacob Faibussowitsch {
1602f3566a2aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1603a30f8f8cSSatish Balay 
1604a30f8f8cSSatish Balay   PetscFunctionBegin;
16059566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
1606a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1607a30f8f8cSSatish Balay }
1608a30f8f8cSSatish Balay 
16096849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1610a30f8f8cSSatish Balay 
1611d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1612d71ae5a4SJacob Faibussowitsch {
1613a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1614a30f8f8cSSatish Balay   Mat           a, b, c, d;
1615ace3abfcSBarry Smith   PetscBool     flg;
1616a30f8f8cSSatish Balay 
1617a30f8f8cSSatish Balay   PetscFunctionBegin;
16189371c9d4SSatish Balay   a = matA->A;
16199371c9d4SSatish Balay   b = matA->B;
16209371c9d4SSatish Balay   c = matB->A;
16219371c9d4SSatish Balay   d = matB->B;
1622a30f8f8cSSatish Balay 
16239566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
162448a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
16251c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1626a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1627a30f8f8cSSatish Balay }
1628a30f8f8cSSatish Balay 
1629d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1630d71ae5a4SJacob Faibussowitsch {
16314c7a3774SStefano Zampini   PetscBool isbaij;
16323c896bc6SHong Zhang 
16333c896bc6SHong Zhang   PetscFunctionBegin;
16349566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
16355f80ce2aSJacob Faibussowitsch   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
16363c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
16373c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
16389566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(A));
16399566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
16409566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(A));
16413c896bc6SHong Zhang   } else {
16424c7a3774SStefano Zampini     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
16434c7a3774SStefano Zampini     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
16444c7a3774SStefano Zampini 
16459566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
16469566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
16473c896bc6SHong Zhang   }
16489566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
16493c896bc6SHong Zhang   PetscFunctionReturn(0);
16503c896bc6SHong Zhang }
16513c896bc6SHong Zhang 
1652d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1653d71ae5a4SJacob Faibussowitsch {
1654273d9f13SBarry Smith   PetscFunctionBegin;
16559566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1656273d9f13SBarry Smith   PetscFunctionReturn(0);
1657273d9f13SBarry Smith }
1658a5e6ed63SBarry Smith 
1659d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1660d71ae5a4SJacob Faibussowitsch {
16614fe895cdSHong Zhang   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
16624fe895cdSHong Zhang   PetscBLASInt  bnz, one                          = 1;
16634fe895cdSHong Zhang   Mat_SeqSBAIJ *xa, *ya;
16644fe895cdSHong Zhang   Mat_SeqBAIJ  *xb, *yb;
16654fe895cdSHong Zhang 
16664fe895cdSHong Zhang   PetscFunctionBegin;
16674fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
16684fe895cdSHong Zhang     PetscScalar alpha = a;
16694fe895cdSHong Zhang     xa                = (Mat_SeqSBAIJ *)xx->A->data;
16704fe895cdSHong Zhang     ya                = (Mat_SeqSBAIJ *)yy->A->data;
16719566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1672792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
16734fe895cdSHong Zhang     xb = (Mat_SeqBAIJ *)xx->B->data;
16744fe895cdSHong Zhang     yb = (Mat_SeqBAIJ *)yy->B->data;
16759566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1676792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
16779566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1678ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
16799566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
16809566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
16819566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
16824fe895cdSHong Zhang   } else {
16834de5dceeSHong Zhang     Mat       B;
16844de5dceeSHong Zhang     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
16855f80ce2aSJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
16869566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(X));
16879566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(Y));
16889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
16899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
16909566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
16919566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
16929566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
16939566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
16949566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, MATMPISBAIJ));
16959566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
16969566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
16979566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
16989566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
16999566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
17009566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
17019566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
17029566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(X));
17039566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(Y));
17044fe895cdSHong Zhang   }
17054fe895cdSHong Zhang   PetscFunctionReturn(0);
17064fe895cdSHong Zhang }
17074fe895cdSHong Zhang 
1708d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1709d71ae5a4SJacob Faibussowitsch {
17101302d50aSBarry Smith   PetscInt  i;
1711afebec48SHong Zhang   PetscBool flg;
1712a5e6ed63SBarry Smith 
17136849ba73SBarry Smith   PetscFunctionBegin;
17149566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1715a5e6ed63SBarry Smith   for (i = 0; i < n; i++) {
17169566063dSJacob Faibussowitsch     PetscCall(ISEqual(irow[i], icol[i], &flg));
171748a46eb9SPierre Jolivet     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
17184dcd73b1SHong Zhang   }
1719a5e6ed63SBarry Smith   PetscFunctionReturn(0);
1720a5e6ed63SBarry Smith }
1721a5e6ed63SBarry Smith 
1722d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1723d71ae5a4SJacob Faibussowitsch {
17247d68702bSBarry Smith   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
17256f33a894SBarry Smith   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
17267d68702bSBarry Smith 
17277d68702bSBarry Smith   PetscFunctionBegin;
17286f33a894SBarry Smith   if (!Y->preallocated) {
17299566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
17306f33a894SBarry Smith   } else if (!aij->nz) {
1731b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
17329566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1733b83222d8SBarry Smith     aij->nonew = nonew;
17347d68702bSBarry Smith   }
17359566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
17367d68702bSBarry Smith   PetscFunctionReturn(0);
17377d68702bSBarry Smith }
17387d68702bSBarry Smith 
1739d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1740d71ae5a4SJacob Faibussowitsch {
17413b49f96aSBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
17423b49f96aSBarry Smith 
17433b49f96aSBarry Smith   PetscFunctionBegin;
17445f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
17459566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
17463b49f96aSBarry Smith   if (d) {
17473b49f96aSBarry Smith     PetscInt rstart;
17489566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
17493b49f96aSBarry Smith     *d += rstart / A->rmap->bs;
17503b49f96aSBarry Smith   }
17513b49f96aSBarry Smith   PetscFunctionReturn(0);
17523b49f96aSBarry Smith }
17533b49f96aSBarry Smith 
1754d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1755d71ae5a4SJacob Faibussowitsch {
1756a5b7ff6bSBarry Smith   PetscFunctionBegin;
1757a5b7ff6bSBarry Smith   *a = ((Mat_MPISBAIJ *)A->data)->A;
1758a5b7ff6bSBarry Smith   PetscFunctionReturn(0);
1759a5b7ff6bSBarry Smith }
17603b49f96aSBarry Smith 
1761a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
17623964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1763a30f8f8cSSatish Balay                                        MatGetRow_MPISBAIJ,
1764a30f8f8cSSatish Balay                                        MatRestoreRow_MPISBAIJ,
1765a9d4b620SHong Zhang                                        MatMult_MPISBAIJ,
176697304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPISBAIJ,
1767431c96f7SBarry Smith                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1768431c96f7SBarry Smith                                        MatMultAdd_MPISBAIJ,
1769f4259b30SLisandro Dalcin                                        NULL,
1770f4259b30SLisandro Dalcin                                        NULL,
1771f4259b30SLisandro Dalcin                                        NULL,
1772f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1773f4259b30SLisandro Dalcin                                        NULL,
1774f4259b30SLisandro Dalcin                                        NULL,
177541f059aeSBarry Smith                                        MatSOR_MPISBAIJ,
1776a30f8f8cSSatish Balay                                        MatTranspose_MPISBAIJ,
177797304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPISBAIJ,
1778a30f8f8cSSatish Balay                                        MatEqual_MPISBAIJ,
1779a30f8f8cSSatish Balay                                        MatGetDiagonal_MPISBAIJ,
1780a30f8f8cSSatish Balay                                        MatDiagonalScale_MPISBAIJ,
1781a30f8f8cSSatish Balay                                        MatNorm_MPISBAIJ,
178297304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1783a30f8f8cSSatish Balay                                        MatAssemblyEnd_MPISBAIJ,
1784a30f8f8cSSatish Balay                                        MatSetOption_MPISBAIJ,
1785a30f8f8cSSatish Balay                                        MatZeroEntries_MPISBAIJ,
1786f4259b30SLisandro Dalcin                                        /* 24*/ NULL,
1787f4259b30SLisandro Dalcin                                        NULL,
1788f4259b30SLisandro Dalcin                                        NULL,
1789f4259b30SLisandro Dalcin                                        NULL,
1790f4259b30SLisandro Dalcin                                        NULL,
17914994cf47SJed Brown                                        /* 29*/ MatSetUp_MPISBAIJ,
1792f4259b30SLisandro Dalcin                                        NULL,
1793f4259b30SLisandro Dalcin                                        NULL,
1794a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPISBAIJ,
1795f4259b30SLisandro Dalcin                                        NULL,
1796d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPISBAIJ,
1797f4259b30SLisandro Dalcin                                        NULL,
1798f4259b30SLisandro Dalcin                                        NULL,
1799f4259b30SLisandro Dalcin                                        NULL,
1800f4259b30SLisandro Dalcin                                        NULL,
1801d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPISBAIJ,
18027dae84e0SHong Zhang                                        MatCreateSubMatrices_MPISBAIJ,
1803d94109b8SHong Zhang                                        MatIncreaseOverlap_MPISBAIJ,
1804a30f8f8cSSatish Balay                                        MatGetValues_MPISBAIJ,
18053c896bc6SHong Zhang                                        MatCopy_MPISBAIJ,
1806f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
1807a30f8f8cSSatish Balay                                        MatScale_MPISBAIJ,
18087d68702bSBarry Smith                                        MatShift_MPISBAIJ,
1809f4259b30SLisandro Dalcin                                        NULL,
1810f4259b30SLisandro Dalcin                                        NULL,
1811f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
1812f4259b30SLisandro Dalcin                                        NULL,
1813f4259b30SLisandro Dalcin                                        NULL,
1814f4259b30SLisandro Dalcin                                        NULL,
1815f4259b30SLisandro Dalcin                                        NULL,
1816f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1817f4259b30SLisandro Dalcin                                        NULL,
1818a30f8f8cSSatish Balay                                        MatSetUnfactored_MPISBAIJ,
1819f4259b30SLisandro Dalcin                                        NULL,
1820a30f8f8cSSatish Balay                                        MatSetValuesBlocked_MPISBAIJ,
18217dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1822f4259b30SLisandro Dalcin                                        NULL,
1823f4259b30SLisandro Dalcin                                        NULL,
1824f4259b30SLisandro Dalcin                                        NULL,
1825f4259b30SLisandro Dalcin                                        NULL,
1826f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1827f4259b30SLisandro Dalcin                                        NULL,
1828f4259b30SLisandro Dalcin                                        NULL,
1829f4259b30SLisandro Dalcin                                        NULL,
1830f4259b30SLisandro Dalcin                                        NULL,
1831d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1832f4259b30SLisandro Dalcin                                        NULL,
183328d58a37SPierre Jolivet                                        MatConvert_MPISBAIJ_Basic,
1834f4259b30SLisandro Dalcin                                        NULL,
1835f4259b30SLisandro Dalcin                                        NULL,
1836f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1837f4259b30SLisandro Dalcin                                        NULL,
1838f4259b30SLisandro Dalcin                                        NULL,
1839f4259b30SLisandro Dalcin                                        NULL,
1840f4259b30SLisandro Dalcin                                        NULL,
1841f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1842f4259b30SLisandro Dalcin                                        NULL,
1843f4259b30SLisandro Dalcin                                        NULL,
1844f4259b30SLisandro Dalcin                                        NULL,
18455bba2384SShri Abhyankar                                        MatLoad_MPISBAIJ,
1846f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
1847f4259b30SLisandro Dalcin                                        NULL,
1848f4259b30SLisandro Dalcin                                        NULL,
1849f4259b30SLisandro Dalcin                                        NULL,
1850f4259b30SLisandro Dalcin                                        NULL,
1851f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1852f4259b30SLisandro Dalcin                                        NULL,
1853f4259b30SLisandro Dalcin                                        NULL,
1854f4259b30SLisandro Dalcin                                        NULL,
1855f4259b30SLisandro Dalcin                                        NULL,
1856f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1857f4259b30SLisandro Dalcin                                        NULL,
1858f4259b30SLisandro Dalcin                                        NULL,
1859f4259b30SLisandro Dalcin                                        NULL,
1860f4259b30SLisandro Dalcin                                        NULL,
1861f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
1862f4259b30SLisandro Dalcin                                        NULL,
1863f4259b30SLisandro Dalcin                                        NULL,
18642726fb6dSPierre Jolivet                                        MatConjugate_MPISBAIJ,
1865f4259b30SLisandro Dalcin                                        NULL,
1866f4259b30SLisandro Dalcin                                        /*104*/ NULL,
186799cafbc1SBarry Smith                                        MatRealPart_MPISBAIJ,
1868d0d4cfc2SHong Zhang                                        MatImaginaryPart_MPISBAIJ,
1869d0d4cfc2SHong Zhang                                        MatGetRowUpperTriangular_MPISBAIJ,
187095936485SShri Abhyankar                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1871f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1872f4259b30SLisandro Dalcin                                        NULL,
1873f4259b30SLisandro Dalcin                                        NULL,
1874f4259b30SLisandro Dalcin                                        NULL,
18753b49f96aSBarry Smith                                        MatMissingDiagonal_MPISBAIJ,
1876f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1877f4259b30SLisandro Dalcin                                        NULL,
1878f4259b30SLisandro Dalcin                                        NULL,
1879f4259b30SLisandro Dalcin                                        NULL,
1880f4259b30SLisandro Dalcin                                        NULL,
1881f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1882f4259b30SLisandro Dalcin                                        NULL,
1883f4259b30SLisandro Dalcin                                        NULL,
1884f4259b30SLisandro Dalcin                                        NULL,
1885f4259b30SLisandro Dalcin                                        NULL,
1886f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1887f4259b30SLisandro Dalcin                                        NULL,
1888f4259b30SLisandro Dalcin                                        NULL,
1889f4259b30SLisandro Dalcin                                        NULL,
1890f4259b30SLisandro Dalcin                                        NULL,
1891f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1892f4259b30SLisandro Dalcin                                        NULL,
1893f4259b30SLisandro Dalcin                                        NULL,
1894f4259b30SLisandro Dalcin                                        NULL,
1895f4259b30SLisandro Dalcin                                        NULL,
1896f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1897f4259b30SLisandro Dalcin                                        NULL,
1898f4259b30SLisandro Dalcin                                        NULL,
1899f4259b30SLisandro Dalcin                                        NULL,
1900f4259b30SLisandro Dalcin                                        NULL,
190146533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
1902f4259b30SLisandro Dalcin                                        NULL,
1903f4259b30SLisandro Dalcin                                        NULL,
1904f4259b30SLisandro Dalcin                                        NULL,
1905f4259b30SLisandro Dalcin                                        NULL,
1906d70f29a3SPierre Jolivet                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1907d70f29a3SPierre Jolivet                                        NULL,
1908d70f29a3SPierre Jolivet                                        NULL,
190999a7f59eSMark Adams                                        NULL,
191099a7f59eSMark Adams                                        NULL,
19117fb60732SBarry Smith                                        NULL,
1912*dec0b466SHong Zhang                                        /*150*/ NULL,
1913*dec0b466SHong Zhang                                        NULL};
1914a30f8f8cSSatish Balay 
1915d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1916d71ae5a4SJacob Faibussowitsch {
1917476417e5SBarry Smith   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1918535b19f3SBarry Smith   PetscInt      i, mbs, Mbs;
19195d2a9ed1SStefano Zampini   PetscMPIInt   size;
1920a23d5eceSKris Buschelman 
1921a23d5eceSKris Buschelman   PetscFunctionBegin;
19229566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
19239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
19259566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
19265f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
19275f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1928899cda47SBarry Smith 
1929d0f46423SBarry Smith   mbs = B->rmap->n / bs;
1930d0f46423SBarry Smith   Mbs = B->rmap->N / bs;
19315f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1932a23d5eceSKris Buschelman 
1933d0f46423SBarry Smith   B->rmap->bs = bs;
1934a23d5eceSKris Buschelman   b->bs2      = bs * bs;
1935a23d5eceSKris Buschelman   b->mbs      = mbs;
1936a23d5eceSKris Buschelman   b->Mbs      = Mbs;
1937de64b629SHong Zhang   b->nbs      = B->cmap->n / bs;
1938de64b629SHong Zhang   b->Nbs      = B->cmap->N / bs;
1939a23d5eceSKris Buschelman 
1940ad540459SPierre Jolivet   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1941d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart / bs;
1942d0f46423SBarry Smith   b->rendbs   = B->rmap->rend / bs;
1943a23d5eceSKris Buschelman 
1944d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart / bs;
1945d0f46423SBarry Smith   b->cendbs   = B->cmap->rend / bs;
1946a23d5eceSKris Buschelman 
1947cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
19489566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&b->colmap));
1949cb7b82ddSBarry Smith #else
19509566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
1951cb7b82ddSBarry Smith #endif
19529566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
19539566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
19549566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
19559566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0));
19569566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0b));
19579566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1));
19589566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1a));
19599566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1b));
19609566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->sMvctx));
1961cb7b82ddSBarry Smith 
1962cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
19639566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
19649566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
19659566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
19669566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
19679566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1968cb7b82ddSBarry Smith 
1969526dfc15SBarry Smith   if (!B->preallocated) {
19709566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
19719566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
19729566063dSJacob Faibussowitsch     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
19739566063dSJacob Faibussowitsch     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1974526dfc15SBarry Smith   }
1975a23d5eceSKris Buschelman 
19769566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
19779566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
197826fbe8dcSKarl Rupp 
1979526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
1980cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
1981cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
1982a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1983a23d5eceSKris Buschelman }
1984a23d5eceSKris Buschelman 
1985d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1986d71ae5a4SJacob Faibussowitsch {
198702106b30SBarry Smith   PetscInt        m, rstart, cend;
1988f4259b30SLisandro Dalcin   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1989f4259b30SLisandro Dalcin   const PetscInt *JJ          = NULL;
1990f4259b30SLisandro Dalcin   PetscScalar    *values      = NULL;
1991bb80cfbbSStefano Zampini   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
19923bd0feecSPierre Jolivet   PetscBool       nooffprocentries;
1993dfb205c3SBarry Smith 
1994dfb205c3SBarry Smith   PetscFunctionBegin;
19955f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
19969566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
19979566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
19989566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19999566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
20009566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2001dfb205c3SBarry Smith   m      = B->rmap->n / bs;
2002dfb205c3SBarry Smith   rstart = B->rmap->rstart / bs;
2003dfb205c3SBarry Smith   cend   = B->cmap->rend / bs;
2004dfb205c3SBarry Smith 
20055f80ce2aSJacob Faibussowitsch   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
20069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2007dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2008dfb205c3SBarry Smith     nz = ii[i + 1] - ii[i];
20095f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
20100cd7f59aSBarry Smith     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2011dfb205c3SBarry Smith     JJ = jj + ii[i];
20120cd7f59aSBarry Smith     bd = 0;
2013dfb205c3SBarry Smith     for (j = 0; j < nz; j++) {
20140cd7f59aSBarry Smith       if (*JJ >= i + rstart) break;
2015dfb205c3SBarry Smith       JJ++;
20160cd7f59aSBarry Smith       bd++;
2017dfb205c3SBarry Smith     }
2018dfb205c3SBarry Smith     d = 0;
2019dfb205c3SBarry Smith     for (; j < nz; j++) {
2020dfb205c3SBarry Smith       if (*JJ++ >= cend) break;
2021dfb205c3SBarry Smith       d++;
2022dfb205c3SBarry Smith     }
2023dfb205c3SBarry Smith     d_nnz[i] = d;
20240cd7f59aSBarry Smith     o_nnz[i] = nz - d - bd;
20250cd7f59aSBarry Smith     nz       = nz - bd;
20260cd7f59aSBarry Smith     nz_max   = PetscMax(nz_max, nz);
2027dfb205c3SBarry Smith   }
20289566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
20299566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
20309566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
2031dfb205c3SBarry Smith 
2032dfb205c3SBarry Smith   values = (PetscScalar *)V;
203348a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2034dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2035dfb205c3SBarry Smith     PetscInt        row   = i + rstart;
2036dfb205c3SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
2037dfb205c3SBarry Smith     const PetscInt *icols = jj + ii[i];
2038bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2039dfb205c3SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
20409566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2041bb80cfbbSStefano Zampini     } else { /* block ordering does not match so we can only insert one block at a time. */
2042bb80cfbbSStefano Zampini       PetscInt j;
20430cd7f59aSBarry Smith       for (j = 0; j < ncols; j++) {
20440cd7f59aSBarry Smith         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
20459566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
20460cd7f59aSBarry Smith       }
20470cd7f59aSBarry Smith     }
2048dfb205c3SBarry Smith   }
2049dfb205c3SBarry Smith 
20509566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
20513bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
20523bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
20539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
20549566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
20553bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
20563bd0feecSPierre Jolivet 
20579566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2058dfb205c3SBarry Smith   PetscFunctionReturn(0);
2059dfb205c3SBarry Smith }
2060dfb205c3SBarry Smith 
20610bad9183SKris Buschelman /*MC
2062fafad747SKris Buschelman    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2063828413b8SBarry Smith    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2064828413b8SBarry Smith    the matrix is stored.
2065828413b8SBarry Smith 
2066828413b8SBarry Smith    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
206711a5261eSBarry Smith    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
20680bad9183SKris Buschelman 
20690bad9183SKris Buschelman    Options Database Keys:
207011a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
20710bad9183SKris Buschelman 
207211a5261eSBarry Smith    Note:
2073476417e5SBarry Smith      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2074476417e5SBarry Smith      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2075476417e5SBarry Smith 
20760bad9183SKris Buschelman    Level: beginner
20770bad9183SKris Buschelman 
207811a5261eSBarry Smith .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
20790bad9183SKris Buschelman M*/
20800bad9183SKris Buschelman 
2081d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2082d71ae5a4SJacob Faibussowitsch {
2083b5df2d14SHong Zhang   Mat_MPISBAIJ *b;
208494ae4db5SBarry Smith   PetscBool     flg = PETSC_FALSE;
2085b5df2d14SHong Zhang 
2086b5df2d14SHong Zhang   PetscFunctionBegin;
20874dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
2088b0a32e0cSBarry Smith   B->data = (void *)b;
20899566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2090b5df2d14SHong Zhang 
2091b5df2d14SHong Zhang   B->ops->destroy = MatDestroy_MPISBAIJ;
2092b5df2d14SHong Zhang   B->ops->view    = MatView_MPISBAIJ;
2093b5df2d14SHong Zhang   B->assembled    = PETSC_FALSE;
2094b5df2d14SHong Zhang   B->insertmode   = NOT_SET_VALUES;
209526fbe8dcSKarl Rupp 
20969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
20979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2098b5df2d14SHong Zhang 
2099b5df2d14SHong Zhang   /* build local table of row and column ownerships */
21009566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2101b5df2d14SHong Zhang 
2102b5df2d14SHong Zhang   /* build cache for off array entries formed */
21039566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
210426fbe8dcSKarl Rupp 
2105b5df2d14SHong Zhang   b->donotstash  = PETSC_FALSE;
21060298fd71SBarry Smith   b->colmap      = NULL;
21070298fd71SBarry Smith   b->garray      = NULL;
2108b5df2d14SHong Zhang   b->roworiented = PETSC_TRUE;
2109b5df2d14SHong Zhang 
2110b5df2d14SHong Zhang   /* stuff used in block assembly */
2111f4259b30SLisandro Dalcin   b->barray = NULL;
2112b5df2d14SHong Zhang 
2113b5df2d14SHong Zhang   /* stuff used for matrix vector multiply */
2114f4259b30SLisandro Dalcin   b->lvec    = NULL;
2115f4259b30SLisandro Dalcin   b->Mvctx   = NULL;
2116f4259b30SLisandro Dalcin   b->slvec0  = NULL;
2117f4259b30SLisandro Dalcin   b->slvec0b = NULL;
2118f4259b30SLisandro Dalcin   b->slvec1  = NULL;
2119f4259b30SLisandro Dalcin   b->slvec1a = NULL;
2120f4259b30SLisandro Dalcin   b->slvec1b = NULL;
2121f4259b30SLisandro Dalcin   b->sMvctx  = NULL;
2122b5df2d14SHong Zhang 
2123b5df2d14SHong Zhang   /* stuff for MatGetRow() */
2124f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2125f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2126b5df2d14SHong Zhang   b->getrowactive = PETSC_FALSE;
2127b5df2d14SHong Zhang 
2128b5df2d14SHong Zhang   /* hash table stuff */
2129f4259b30SLisandro Dalcin   b->ht           = NULL;
2130f4259b30SLisandro Dalcin   b->hd           = NULL;
2131b5df2d14SHong Zhang   b->ht_size      = 0;
2132b5df2d14SHong Zhang   b->ht_flag      = PETSC_FALSE;
2133b5df2d14SHong Zhang   b->ht_fact      = 0;
2134b5df2d14SHong Zhang   b->ht_total_ct  = 0;
2135b5df2d14SHong Zhang   b->ht_insert_ct = 0;
2136b5df2d14SHong Zhang 
21377dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
21387a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
21397a868f3eSHong Zhang 
2140f4259b30SLisandro Dalcin   b->in_loc = NULL;
2141f4259b30SLisandro Dalcin   b->v_loc  = NULL;
214259ffdab8SBarry Smith   b->n_loc  = 0;
214394ae4db5SBarry Smith 
21449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
21459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
21469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
21479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
21486214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
21499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
21506214f412SHong Zhang #endif
2151d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
21529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2153d24d4204SJose E. Roman #endif
21549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
21559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2156aa5a9175SDahai Guo 
2157b94d7dedSBarry Smith   B->symmetric                   = PETSC_BOOL3_TRUE;
2158b94d7dedSBarry Smith   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2159b94d7dedSBarry Smith   B->symmetry_eternal            = PETSC_TRUE;
2160b94d7dedSBarry Smith   B->structural_symmetry_eternal = PETSC_TRUE;
2161eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
2162b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_FALSE;
2163eb1ec7c1SStefano Zampini #else
2164b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_TRUE;
2165eb1ec7c1SStefano Zampini #endif
216613647f61SHong Zhang 
21679566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2168d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
21699566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
217094ae4db5SBarry Smith   if (flg) {
217194ae4db5SBarry Smith     PetscReal fact = 1.39;
21729566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
21739566063dSJacob Faibussowitsch     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
217494ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
21759566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
21769566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
217794ae4db5SBarry Smith   }
2178d0609cedSBarry Smith   PetscOptionsEnd();
2179b5df2d14SHong Zhang   PetscFunctionReturn(0);
2180b5df2d14SHong Zhang }
2181b5df2d14SHong Zhang 
2182209238afSKris Buschelman /*MC
2183002d173eSKris Buschelman    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2184209238afSKris Buschelman 
218511a5261eSBarry Smith    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
218611a5261eSBarry Smith    and `MATMPISBAIJ` otherwise.
2187209238afSKris Buschelman 
218811a5261eSBarry Smith    Options Database Key:
2189209238afSKris Buschelman . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2190209238afSKris Buschelman 
2191209238afSKris Buschelman   Level: beginner
2192209238afSKris Buschelman 
219311a5261eSBarry Smith .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2194209238afSKris Buschelman M*/
2195209238afSKris Buschelman 
2196b5df2d14SHong Zhang /*@C
2197b5df2d14SHong Zhang    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2198b5df2d14SHong Zhang    the user should preallocate the matrix storage by setting the parameters
2199b5df2d14SHong Zhang    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2200b5df2d14SHong Zhang    performance can be increased by more than a factor of 50.
2201b5df2d14SHong Zhang 
220211a5261eSBarry Smith    Collective on B
2203b5df2d14SHong Zhang 
2204b5df2d14SHong Zhang    Input Parameters:
22051c4f3114SJed Brown +  B - the matrix
2206bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2207bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2208b5df2d14SHong Zhang .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2209b5df2d14SHong Zhang            submatrix  (same for all local rows)
2210b5df2d14SHong Zhang .  d_nnz - array containing the number of block nonzeros in the various block rows
22116d10fdaeSSatish Balay            in the upper triangular and diagonal part of the in diagonal portion of the local
22120298fd71SBarry Smith            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
221395742e49SBarry Smith            for the diagonal entry and set a value even if it is zero.
2214b5df2d14SHong Zhang .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2215b5df2d14SHong Zhang            submatrix (same for all local rows).
2216b5df2d14SHong Zhang -  o_nnz - array containing the number of nonzeros in the various block rows of the
2217c2fc9fa9SBarry Smith            off-diagonal portion of the local submatrix that is right of the diagonal
22180298fd71SBarry Smith            (possibly different for each block row) or NULL.
2219b5df2d14SHong Zhang 
2220b5df2d14SHong Zhang    Options Database Keys:
2221a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2222b5df2d14SHong Zhang                      block calculations (much slower)
2223a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
2224b5df2d14SHong Zhang 
2225b5df2d14SHong Zhang    Notes:
2226b5df2d14SHong Zhang 
222711a5261eSBarry Smith    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2228b5df2d14SHong Zhang    than it must be used on all processors that share the object for that argument.
2229b5df2d14SHong Zhang 
223049a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
223149a6f317SBarry Smith 
2232b5df2d14SHong Zhang    Storage Information:
2233b5df2d14SHong Zhang    For a square global matrix we define each processor's diagonal portion
2234b5df2d14SHong Zhang    to be its local rows and the corresponding columns (a square submatrix);
2235b5df2d14SHong Zhang    each processor's off-diagonal portion encompasses the remainder of the
2236b5df2d14SHong Zhang    local matrix (a rectangular submatrix).
2237b5df2d14SHong Zhang 
2238b5df2d14SHong Zhang    The user can specify preallocated storage for the diagonal part of
2239b5df2d14SHong Zhang    the local submatrix with either d_nz or d_nnz (not both).  Set
224011a5261eSBarry Smith    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2241b5df2d14SHong Zhang    memory allocation.  Likewise, specify preallocated storage for the
2242b5df2d14SHong Zhang    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2243b5df2d14SHong Zhang 
224411a5261eSBarry Smith    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2245aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2246aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
2247aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
2248aa95bbe8SBarry Smith 
2249b5df2d14SHong Zhang    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2250b5df2d14SHong Zhang    the figure below we depict these three local rows and all columns (0-11).
2251b5df2d14SHong Zhang 
2252b5df2d14SHong Zhang .vb
2253b5df2d14SHong Zhang            0 1 2 3 4 5 6 7 8 9 10 11
2254a4b1a0f6SJed Brown           --------------------------
2255c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2256c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2257c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2258a4b1a0f6SJed Brown           --------------------------
2259b5df2d14SHong Zhang .ve
2260b5df2d14SHong Zhang 
2261b5df2d14SHong Zhang    Thus, any entries in the d locations are stored in the d (diagonal)
2262b5df2d14SHong Zhang    submatrix, and any entries in the o locations are stored in the
22636d10fdaeSSatish Balay    o (off-diagonal) submatrix.  Note that the d matrix is stored in
226411a5261eSBarry Smith    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2265b5df2d14SHong Zhang 
22666d10fdaeSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
22676d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
2268c2fc9fa9SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix
2269c2fc9fa9SBarry Smith 
2270b5df2d14SHong Zhang    In general, for PDE problems in which most nonzeros are near the diagonal,
2271b5df2d14SHong Zhang    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2272b5df2d14SHong Zhang    or you will get TERRIBLE performance; see the users' manual chapter on
2273b5df2d14SHong Zhang    matrices.
2274b5df2d14SHong Zhang 
2275b5df2d14SHong Zhang    Level: intermediate
2276b5df2d14SHong Zhang 
227711a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2278b5df2d14SHong Zhang @*/
2279d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2280d71ae5a4SJacob Faibussowitsch {
2281b5df2d14SHong Zhang   PetscFunctionBegin;
22826ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
22836ba663aaSJed Brown   PetscValidType(B, 1);
22846ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
2285cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2286b5df2d14SHong Zhang   PetscFunctionReturn(0);
2287b5df2d14SHong Zhang }
2288b5df2d14SHong Zhang 
2289a30f8f8cSSatish Balay /*@C
229011a5261eSBarry Smith    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2291a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2292a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2293a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2294a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2295a30f8f8cSSatish Balay 
2296d083f849SBarry Smith    Collective
2297a30f8f8cSSatish Balay 
2298a30f8f8cSSatish Balay    Input Parameters:
2299a30f8f8cSSatish Balay +  comm - MPI communicator
230011a5261eSBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2301bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
230211a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2303a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2304a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
230511a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2306a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2307a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
230811a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
230911a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2310a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2311a30f8f8cSSatish Balay            submatrix  (same for all local rows)
2312a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
23136d10fdaeSSatish Balay            in the upper triangular portion of the in diagonal portion of the local
23140298fd71SBarry Smith            (possibly different for each block block row) or NULL.
231595742e49SBarry Smith            If you plan to factor the matrix you must leave room for the diagonal entry and
231695742e49SBarry Smith            set its value even if it is zero.
2317a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2318a30f8f8cSSatish Balay            submatrix (same for all local rows).
2319a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2320a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
23210298fd71SBarry Smith            each block row) or NULL.
2322a30f8f8cSSatish Balay 
2323a30f8f8cSSatish Balay    Output Parameter:
2324a30f8f8cSSatish Balay .  A - the matrix
2325a30f8f8cSSatish Balay 
2326a30f8f8cSSatish Balay    Options Database Keys:
2327a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2328a30f8f8cSSatish Balay                      block calculations (much slower)
2329a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2330a2b725a8SWilliam Gropp -   -mat_mpi - use the parallel matrix data structures even on one processor
2331a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2332a30f8f8cSSatish Balay 
233311a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2334f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
233511a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2336175b88e8SBarry Smith 
2337a30f8f8cSSatish Balay    Notes:
2338d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
23396d6d819aSHong Zhang    This matrix type does not support complex Hermitian operation.
2340d1be2dadSMatthew Knepley 
2341a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2342a30f8f8cSSatish Balay    (possibly both).
2343a30f8f8cSSatish Balay 
234411a5261eSBarry Smith    If` PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2345a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2346a30f8f8cSSatish Balay 
234749a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
234849a6f317SBarry Smith 
2349a30f8f8cSSatish Balay    Storage Information:
2350a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2351a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2352a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2353a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2354a30f8f8cSSatish Balay 
2355a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2356a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
23570298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2358a30f8f8cSSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
2359a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2360a30f8f8cSSatish Balay 
2361a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2362a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2363a30f8f8cSSatish Balay 
2364a30f8f8cSSatish Balay .vb
2365a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2366a4b1a0f6SJed Brown           --------------------------
2367c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2368c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2369c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2370a4b1a0f6SJed Brown           --------------------------
2371a30f8f8cSSatish Balay .ve
2372a30f8f8cSSatish Balay 
2373a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2374a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
23756d10fdaeSSatish Balay    o (off-diagonal) submatrix.  Note that the d matrix is stored in
237611a5261eSBarry Smith    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2377a30f8f8cSSatish Balay 
23786d10fdaeSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
23796d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
2380a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2381a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2382a30f8f8cSSatish Balay    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2383a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2384a30f8f8cSSatish Balay    matrices.
2385a30f8f8cSSatish Balay 
2386a30f8f8cSSatish Balay    Level: intermediate
2387a30f8f8cSSatish Balay 
238811a5261eSBarry Smith .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2389a30f8f8cSSatish Balay @*/
2390a30f8f8cSSatish Balay 
2391d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2392d71ae5a4SJacob Faibussowitsch {
23931302d50aSBarry Smith   PetscMPIInt size;
2394a30f8f8cSSatish Balay 
2395a30f8f8cSSatish Balay   PetscFunctionBegin;
23969566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
23979566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23989566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2399273d9f13SBarry Smith   if (size > 1) {
24009566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPISBAIJ));
24019566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2402273d9f13SBarry Smith   } else {
24039566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQSBAIJ));
24049566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2405273d9f13SBarry Smith   }
2406a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2407a30f8f8cSSatish Balay }
2408a30f8f8cSSatish Balay 
2409d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2410d71ae5a4SJacob Faibussowitsch {
2411a30f8f8cSSatish Balay   Mat           mat;
2412a30f8f8cSSatish Balay   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2413d0f46423SBarry Smith   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2414387bc808SHong Zhang   PetscScalar  *array;
2415a30f8f8cSSatish Balay 
2416a30f8f8cSSatish Balay   PetscFunctionBegin;
2417f4259b30SLisandro Dalcin   *newmat = NULL;
241826fbe8dcSKarl Rupp 
24199566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
24209566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
24219566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
24229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
24239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2424e1b6402fSHong Zhang 
2425d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2426273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
242782327fa8SHong Zhang   mat->assembled    = PETSC_TRUE;
24287fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24297fff6886SHong Zhang 
2430b5df2d14SHong Zhang   a      = (Mat_MPISBAIJ *)mat->data;
2431a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2432a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2433a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2434a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2435a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2436a30f8f8cSSatish Balay 
2437a30f8f8cSSatish Balay   a->size         = oldmat->size;
2438a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2439a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2440a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2441f4259b30SLisandro Dalcin   a->rowindices   = NULL;
2442f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
2443a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2444f4259b30SLisandro Dalcin   a->barray       = NULL;
2445899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
2446899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
2447899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
2448899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
2449a30f8f8cSSatish Balay 
2450a30f8f8cSSatish Balay   /* hash table stuff */
2451f4259b30SLisandro Dalcin   a->ht           = NULL;
2452f4259b30SLisandro Dalcin   a->hd           = NULL;
2453a30f8f8cSSatish Balay   a->ht_size      = 0;
2454a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2455a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2456a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2457a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2458a30f8f8cSSatish Balay 
24599566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2460a30f8f8cSSatish Balay   if (oldmat->colmap) {
2461a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
24629566063dSJacob Faibussowitsch     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2463a30f8f8cSSatish Balay #else
24649566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
24659566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2466a30f8f8cSSatish Balay #endif
2467f4259b30SLisandro Dalcin   } else a->colmap = NULL;
2468387bc808SHong Zhang 
2469a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
24709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len, &a->garray));
24719566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2472f4259b30SLisandro Dalcin   } else a->garray = NULL;
2473a30f8f8cSSatish Balay 
24749566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
24759566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
24769566063dSJacob Faibussowitsch   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
247782327fa8SHong Zhang 
24789566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
24799566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2480387bc808SHong Zhang 
24819566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(a->slvec1, &nt));
24829566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec1, &array));
24839566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
24849566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
24859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec1, &array));
24869566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &array));
24879566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
24889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &array));
2489387bc808SHong Zhang 
2490387bc808SHong Zhang   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
24919566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2492387bc808SHong Zhang   a->sMvctx = oldmat->sMvctx;
249382327fa8SHong Zhang 
24949566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
24959566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
24969566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2497a30f8f8cSSatish Balay   *newmat = mat;
2498a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2499a30f8f8cSSatish Balay }
2500a30f8f8cSSatish Balay 
2501618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
2502618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2503618cc2edSLisandro Dalcin 
2504d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2505d71ae5a4SJacob Faibussowitsch {
25067f489da9SVaclav Hapla   PetscBool isbinary;
250795936485SShri Abhyankar 
250895936485SShri Abhyankar   PetscFunctionBegin;
25099566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
25105f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
25119566063dSJacob Faibussowitsch   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
251295936485SShri Abhyankar   PetscFunctionReturn(0);
251395936485SShri Abhyankar }
251495936485SShri Abhyankar 
2515d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2516d71ae5a4SJacob Faibussowitsch {
251724d5174aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2518f4c0e9e4SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2519ca54ac64SHong Zhang   PetscReal     atmp;
252087828ca2SBarry Smith   PetscReal    *work, *svalues, *rvalues;
25211302d50aSBarry Smith   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
25221302d50aSBarry Smith   PetscMPIInt   rank, size;
25231302d50aSBarry Smith   PetscInt     *rowners_bs, dest, count, source;
252487828ca2SBarry Smith   PetscScalar  *va;
25258a1c53f2SBarry Smith   MatScalar    *ba;
2526f4c0e9e4SHong Zhang   MPI_Status    stat;
252724d5174aSHong Zhang 
252824d5174aSHong Zhang   PetscFunctionBegin;
25295f80ce2aSJacob Faibussowitsch   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
25309566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
25319566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &va));
2532f4c0e9e4SHong Zhang 
25339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
25349566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2535f4c0e9e4SHong Zhang 
2536d0f46423SBarry Smith   bs  = A->rmap->bs;
2537f4c0e9e4SHong Zhang   mbs = a->mbs;
2538f4c0e9e4SHong Zhang   Mbs = a->Mbs;
2539f4c0e9e4SHong Zhang   ba  = b->a;
2540f4c0e9e4SHong Zhang   bi  = b->i;
2541f4c0e9e4SHong Zhang   bj  = b->j;
2542f4c0e9e4SHong Zhang 
2543f4c0e9e4SHong Zhang   /* find ownerships */
2544d0f46423SBarry Smith   rowners_bs = A->rmap->range;
2545f4c0e9e4SHong Zhang 
2546f4c0e9e4SHong Zhang   /* each proc creates an array to be distributed */
25479566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs * Mbs, &work));
2548f4c0e9e4SHong Zhang 
2549f4c0e9e4SHong Zhang   /* row_max for B */
2550b8475685SHong Zhang   if (rank != size - 1) {
2551f4c0e9e4SHong Zhang     for (i = 0; i < mbs; i++) {
25529371c9d4SSatish Balay       ncols = bi[1] - bi[0];
25539371c9d4SSatish Balay       bi++;
2554f4c0e9e4SHong Zhang       brow = bs * i;
2555f4c0e9e4SHong Zhang       for (j = 0; j < ncols; j++) {
2556f4c0e9e4SHong Zhang         bcol = bs * (*bj);
2557f4c0e9e4SHong Zhang         for (kcol = 0; kcol < bs; kcol++) {
2558ca54ac64SHong Zhang           col = bcol + kcol;           /* local col index */
255904d41228SHong Zhang           col += rowners_bs[rank + 1]; /* global col index */
2560f4c0e9e4SHong Zhang           for (krow = 0; krow < bs; krow++) {
25619371c9d4SSatish Balay             atmp = PetscAbsScalar(*ba);
25629371c9d4SSatish Balay             ba++;
2563ca54ac64SHong Zhang             row = brow + krow; /* local row index */
2564ca54ac64SHong Zhang             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2565f4c0e9e4SHong Zhang             if (work[col] < atmp) work[col] = atmp;
2566f4c0e9e4SHong Zhang           }
2567f4c0e9e4SHong Zhang         }
2568f4c0e9e4SHong Zhang         bj++;
2569f4c0e9e4SHong Zhang       }
2570f4c0e9e4SHong Zhang     }
2571f4c0e9e4SHong Zhang 
2572f4c0e9e4SHong Zhang     /* send values to its owners */
2573f4c0e9e4SHong Zhang     for (dest = rank + 1; dest < size; dest++) {
2574f4c0e9e4SHong Zhang       svalues = work + rowners_bs[dest];
2575ca54ac64SHong Zhang       count   = rowners_bs[dest + 1] - rowners_bs[dest];
25769566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2577ca54ac64SHong Zhang     }
2578f4c0e9e4SHong Zhang   }
2579f4c0e9e4SHong Zhang 
2580f4c0e9e4SHong Zhang   /* receive values */
2581ca54ac64SHong Zhang   if (rank) {
2582f4c0e9e4SHong Zhang     rvalues = work;
2583ca54ac64SHong Zhang     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2584f4c0e9e4SHong Zhang     for (source = 0; source < rank; source++) {
25859566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2586f4c0e9e4SHong Zhang       /* process values */
2587f4c0e9e4SHong Zhang       for (i = 0; i < count; i++) {
2588ca54ac64SHong Zhang         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2589f4c0e9e4SHong Zhang       }
2590f4c0e9e4SHong Zhang     }
2591ca54ac64SHong Zhang   }
2592f4c0e9e4SHong Zhang 
25939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &va));
25949566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
259524d5174aSHong Zhang   PetscFunctionReturn(0);
259624d5174aSHong Zhang }
25972798e883SHong Zhang 
2598d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2599d71ae5a4SJacob Faibussowitsch {
26002798e883SHong Zhang   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2601d0f46423SBarry Smith   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
26023649974fSBarry Smith   PetscScalar       *x, *ptr, *from;
2603ffe4fb16SHong Zhang   Vec                bb1;
26043649974fSBarry Smith   const PetscScalar *b;
2605ffe4fb16SHong Zhang 
2606ffe4fb16SHong Zhang   PetscFunctionBegin;
26075f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
26085f80ce2aSJacob Faibussowitsch   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2609ffe4fb16SHong Zhang 
2610a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
26119566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2612a2b30743SBarry Smith     PetscFunctionReturn(0);
2613a2b30743SBarry Smith   }
2614a2b30743SBarry Smith 
2615ffe4fb16SHong Zhang   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2616ffe4fb16SHong Zhang     if (flag & SOR_ZERO_INITIAL_GUESS) {
26179566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2618ffe4fb16SHong Zhang       its--;
2619ffe4fb16SHong Zhang     }
2620ffe4fb16SHong Zhang 
26219566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb, &bb1));
2622ffe4fb16SHong Zhang     while (its--) {
2623ffe4fb16SHong Zhang       /* lower triangular part: slvec0b = - B^T*xx */
26249566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2625ffe4fb16SHong Zhang 
2626ffe4fb16SHong Zhang       /* copy xx into slvec0a */
26279566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec0, &ptr));
26289566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26299566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
26309566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2631ffe4fb16SHong Zhang 
26329566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->slvec0, -1.0));
2633ffe4fb16SHong Zhang 
2634ffe4fb16SHong Zhang       /* copy bb into slvec1a */
26359566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1, &ptr));
26369566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26379566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
26389566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2639ffe4fb16SHong Zhang 
2640ffe4fb16SHong Zhang       /* set slvec1b = 0 */
26419566063dSJacob Faibussowitsch       PetscCall(VecSet(mat->slvec1b, 0.0));
2642ffe4fb16SHong Zhang 
26439566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
26449566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
26459566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
26469566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2647ffe4fb16SHong Zhang 
2648ffe4fb16SHong Zhang       /* upper triangular part: bb1 = bb1 - B*x */
26499566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2650ffe4fb16SHong Zhang 
2651ffe4fb16SHong Zhang       /* local diagonal sweep */
26529566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2653ffe4fb16SHong Zhang     }
26549566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&bb1));
2655fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26569566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2657fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26589566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2659fa22f6d0SBarry Smith   } else if (flag & SOR_EISENSTAT) {
2660fa22f6d0SBarry Smith     Vec                xx1;
2661ace3abfcSBarry Smith     PetscBool          hasop;
266220f1ed55SBarry Smith     const PetscScalar *diag;
2663887ee2caSBarry Smith     PetscScalar       *sl, scale = (omega - 2.0) / omega;
266420f1ed55SBarry Smith     PetscInt           i, n;
2665fa22f6d0SBarry Smith 
2666fa22f6d0SBarry Smith     if (!mat->xx1) {
26679566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->xx1));
26689566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->bb1));
2669fa22f6d0SBarry Smith     }
2670fa22f6d0SBarry Smith     xx1 = mat->xx1;
2671fa22f6d0SBarry Smith     bb1 = mat->bb1;
2672fa22f6d0SBarry Smith 
26739566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2674fa22f6d0SBarry Smith 
2675fa22f6d0SBarry Smith     if (!mat->diag) {
2676effcda25SBarry Smith       /* this is wrong for same matrix with new nonzero values */
26779566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
26789566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin, mat->diag));
2679fa22f6d0SBarry Smith     }
26809566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2681fa22f6d0SBarry Smith 
2682fa22f6d0SBarry Smith     if (hasop) {
26839566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
26849566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
268520f1ed55SBarry Smith     } else {
268620f1ed55SBarry Smith       /*
268720f1ed55SBarry Smith           These two lines are replaced by code that may be a bit faster for a good compiler
26889566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
26899566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
269020f1ed55SBarry Smith       */
26919566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1a, &sl));
26929566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(mat->diag, &diag));
26939566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26949566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26959566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(xx, &n));
2696887ee2caSBarry Smith       if (omega == 1.0) {
269726fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
26989566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0 * n));
2699887ee2caSBarry Smith       } else {
270026fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
27019566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(3.0 * n));
2702887ee2caSBarry Smith       }
27039566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
27049566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
27059566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
27069566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
270720f1ed55SBarry Smith     }
2708fa22f6d0SBarry Smith 
2709fa22f6d0SBarry Smith     /* multiply off-diagonal portion of matrix */
27109566063dSJacob Faibussowitsch     PetscCall(VecSet(mat->slvec1b, 0.0));
27119566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
27129566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mat->slvec0, &from));
27139566063dSJacob Faibussowitsch     PetscCall(VecGetArray(xx, &x));
27149566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(from, x, bs * mbs));
27159566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mat->slvec0, &from));
27169566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(xx, &x));
27179566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27189566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27199566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2720fa22f6d0SBarry Smith 
2721fa22f6d0SBarry Smith     /* local sweep */
27229566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
27239566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx, 1.0, xx1));
2724f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2725ffe4fb16SHong Zhang   PetscFunctionReturn(0);
2726ffe4fb16SHong Zhang }
2727ffe4fb16SHong Zhang 
2728dfb205c3SBarry Smith /*@
272911a5261eSBarry Smith      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2730dfb205c3SBarry Smith          CSR format the local rows.
2731dfb205c3SBarry Smith 
2732d083f849SBarry Smith    Collective
2733dfb205c3SBarry Smith 
2734dfb205c3SBarry Smith    Input Parameters:
2735dfb205c3SBarry Smith +  comm - MPI communicator
2736dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
273711a5261eSBarry Smith .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2738dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
273911a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2740dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
274111a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
274211a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2743483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2744dfb205c3SBarry Smith .   j - column indices
2745dfb205c3SBarry Smith -   a - matrix values
2746dfb205c3SBarry Smith 
2747dfb205c3SBarry Smith    Output Parameter:
2748dfb205c3SBarry Smith .   mat - the matrix
2749dfb205c3SBarry Smith 
2750dfb205c3SBarry Smith    Level: intermediate
2751dfb205c3SBarry Smith 
2752dfb205c3SBarry Smith    Notes:
2753dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2754dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2755dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2756dfb205c3SBarry Smith 
2757dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2758dfb205c3SBarry Smith 
275911a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2760db781477SPatrick Sanan           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2761dfb205c3SBarry Smith @*/
2762d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2763d71ae5a4SJacob Faibussowitsch {
2764dfb205c3SBarry Smith   PetscFunctionBegin;
27655f80ce2aSJacob Faibussowitsch   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
27665f80ce2aSJacob Faibussowitsch   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
27679566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
27689566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
27699566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPISBAIJ));
27709566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2771dfb205c3SBarry Smith   PetscFunctionReturn(0);
2772dfb205c3SBarry Smith }
2773dfb205c3SBarry Smith 
2774dfb205c3SBarry Smith /*@C
277511a5261eSBarry Smith    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2776dfb205c3SBarry Smith 
2777d083f849SBarry Smith    Collective
2778dfb205c3SBarry Smith 
2779dfb205c3SBarry Smith    Input Parameters:
27801c4f3114SJed Brown +  B - the matrix
2781dfb205c3SBarry Smith .  bs - the block size
2782dfb205c3SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2783dfb205c3SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2784dfb205c3SBarry Smith -  v - optional values in the matrix
2785dfb205c3SBarry Smith 
2786664954b6SBarry Smith    Level: advanced
2787664954b6SBarry Smith 
2788664954b6SBarry Smith    Notes:
27890cd7f59aSBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
27900cd7f59aSBarry Smith    and usually the numerical values as well
27910cd7f59aSBarry Smith 
279250c5228eSBarry Smith    Any entries below the diagonal are ignored
2793dfb205c3SBarry Smith 
279411a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2795dfb205c3SBarry Smith @*/
2796d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2797d71ae5a4SJacob Faibussowitsch {
2798dfb205c3SBarry Smith   PetscFunctionBegin;
2799cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2800dfb205c3SBarry Smith   PetscFunctionReturn(0);
2801dfb205c3SBarry Smith }
2802dfb205c3SBarry Smith 
2803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2804d71ae5a4SJacob Faibussowitsch {
280510c56fdeSHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
280610c56fdeSHong Zhang   PetscInt    *indx;
280710c56fdeSHong Zhang   PetscScalar *values;
2808dfb205c3SBarry Smith 
28094dcd73b1SHong Zhang   PetscFunctionBegin;
28109566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
281110c56fdeSHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
281210c56fdeSHong Zhang     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2813de25e9cbSPierre Jolivet     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
281410c56fdeSHong Zhang     PetscInt     *bindx, rmax = a->rmax, j;
2815de25e9cbSPierre Jolivet     PetscMPIInt   rank, size;
28164dcd73b1SHong Zhang 
28179566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28189371c9d4SSatish Balay     mbs = m / bs;
28199371c9d4SSatish Balay     Nbs = N / cbs;
282048a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2821da91a574SPierre Jolivet     nbs = n / cbs;
28224dcd73b1SHong Zhang 
28239566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(rmax, &bindx));
2824d0609cedSBarry Smith     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2825de25e9cbSPierre Jolivet 
28269566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
28279566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &size));
2828de25e9cbSPierre Jolivet     if (rank == size - 1) {
2829de25e9cbSPierre Jolivet       /* Check sum(nbs) = Nbs */
28305f80ce2aSJacob Faibussowitsch       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2831de25e9cbSPierre Jolivet     }
2832de25e9cbSPierre Jolivet 
2833d0609cedSBarry Smith     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
28349566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
283510c56fdeSHong Zhang     for (i = 0; i < mbs; i++) {
28369566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
28374dcd73b1SHong Zhang       nnz = nnz / bs;
28384dcd73b1SHong Zhang       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
28399566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
28409566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
28414dcd73b1SHong Zhang     }
28429566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28439566063dSJacob Faibussowitsch     PetscCall(PetscFree(bindx));
28444dcd73b1SHong Zhang 
28459566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
28469566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
28479566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
28489566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, MATSBAIJ));
28499566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
28509566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2851d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
28524dcd73b1SHong Zhang   }
28534dcd73b1SHong Zhang 
285410c56fdeSHong Zhang   /* numeric phase */
28559566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28569566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
28574dcd73b1SHong Zhang 
28589566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
28594dcd73b1SHong Zhang   for (i = 0; i < m; i++) {
28609566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28614dcd73b1SHong Zhang     Ii = i + rstart;
28629566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
28639566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28644dcd73b1SHong Zhang   }
28659566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28669566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
28679566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
28684dcd73b1SHong Zhang   PetscFunctionReturn(0);
28694dcd73b1SHong Zhang }
2870