xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 23a3927d5baa326da5fa80231485e7e64d9ba13a)
1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
4c6db04a5SJed Brown #include <petscblaslapack.h>
5a30f8f8cSSatish Balay 
66214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
7cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
86214f412SHong Zhang #endif
9d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
10d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
11d24d4204SJose E. Roman #endif
12b147fbf3SStefano Zampini 
13b147fbf3SStefano Zampini /* This could be moved to matimpl.h */
14d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
15d71ae5a4SJacob Faibussowitsch {
16b147fbf3SStefano Zampini   Mat       preallocator;
17b147fbf3SStefano Zampini   PetscInt  r, rstart, rend;
18b147fbf3SStefano Zampini   PetscInt  bs, i, m, n, M, N;
19b147fbf3SStefano Zampini   PetscBool cong = PETSC_TRUE;
20b147fbf3SStefano Zampini 
21b147fbf3SStefano Zampini   PetscFunctionBegin;
22b147fbf3SStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
23b147fbf3SStefano Zampini   PetscValidLogicalCollectiveInt(B, nm, 2);
24b147fbf3SStefano Zampini   for (i = 0; i < nm; i++) {
25b147fbf3SStefano Zampini     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
269566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
275f80ce2aSJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
28b147fbf3SStefano Zampini   }
29b147fbf3SStefano Zampini   PetscValidLogicalCollectiveBool(B, fill, 5);
309566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(B, &bs));
319566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, &M, &N));
329566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(B, &m, &n));
339566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
349566063dSJacob Faibussowitsch   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
359566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(preallocator, bs));
369566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(preallocator, m, n, M, N));
379566063dSJacob Faibussowitsch   PetscCall(MatSetUp(preallocator));
389566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
39b147fbf3SStefano Zampini   for (r = rstart; r < rend; ++r) {
40b147fbf3SStefano Zampini     PetscInt           ncols;
41b147fbf3SStefano Zampini     const PetscInt    *row;
42b147fbf3SStefano Zampini     const PetscScalar *vals;
43b147fbf3SStefano Zampini 
44b147fbf3SStefano Zampini     for (i = 0; i < nm; i++) {
459566063dSJacob Faibussowitsch       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
469566063dSJacob Faibussowitsch       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
4748a46eb9SPierre Jolivet       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
489566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
49b147fbf3SStefano Zampini     }
50b147fbf3SStefano Zampini   }
519566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
539566063dSJacob Faibussowitsch   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
549566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&preallocator));
553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
56b147fbf3SStefano Zampini }
57b147fbf3SStefano Zampini 
58d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
59d71ae5a4SJacob Faibussowitsch {
60b147fbf3SStefano Zampini   Mat      B;
61b147fbf3SStefano Zampini   PetscInt r;
62b147fbf3SStefano Zampini 
63b147fbf3SStefano Zampini   PetscFunctionBegin;
64b147fbf3SStefano Zampini   if (reuse != MAT_REUSE_MATRIX) {
6528d58a37SPierre Jolivet     PetscBool symm = PETSC_TRUE, isdense;
66b147fbf3SStefano Zampini     PetscInt  bs;
67b147fbf3SStefano Zampini 
689566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
699566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
709566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, newtype));
719566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSize(A, &bs));
729566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSize(B, bs));
739566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
749566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
759566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
7628d58a37SPierre Jolivet     if (!isdense) {
779566063dSJacob Faibussowitsch       PetscCall(MatGetRowUpperTriangular(A));
789566063dSJacob Faibussowitsch       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
799566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowUpperTriangular(A));
8028d58a37SPierre Jolivet     } else {
819566063dSJacob Faibussowitsch       PetscCall(MatSetUp(B));
8228d58a37SPierre Jolivet     }
8328d58a37SPierre Jolivet   } else {
8428d58a37SPierre Jolivet     B = *newmat;
859566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(B));
8628d58a37SPierre Jolivet   }
87b147fbf3SStefano Zampini 
889566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
89b147fbf3SStefano Zampini   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
90b147fbf3SStefano Zampini     PetscInt           ncols;
91b147fbf3SStefano Zampini     const PetscInt    *row;
92b147fbf3SStefano Zampini     const PetscScalar *vals;
93b147fbf3SStefano Zampini 
949566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
959566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
96eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
97b94d7dedSBarry Smith     if (A->hermitian == PETSC_BOOL3_TRUE) {
98eb1ec7c1SStefano Zampini       PetscInt i;
9948a46eb9SPierre Jolivet       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
100eb1ec7c1SStefano Zampini     } else {
1019566063dSJacob Faibussowitsch       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
102eb1ec7c1SStefano Zampini     }
103eb1ec7c1SStefano Zampini #else
1049566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
105eb1ec7c1SStefano Zampini #endif
1069566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
107b147fbf3SStefano Zampini   }
1089566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
1099566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
111b147fbf3SStefano Zampini 
112b147fbf3SStefano Zampini   if (reuse == MAT_INPLACE_MATRIX) {
1139566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
114b147fbf3SStefano Zampini   } else {
115b147fbf3SStefano Zampini     *newmat = B;
116b147fbf3SStefano Zampini   }
1173ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
118b147fbf3SStefano Zampini }
119b147fbf3SStefano Zampini 
120d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
121d71ae5a4SJacob Faibussowitsch {
122f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
123a30f8f8cSSatish Balay 
124a30f8f8cSSatish Balay   PetscFunctionBegin;
1259566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
1269566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
1273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
128a30f8f8cSSatish Balay }
129a30f8f8cSSatish Balay 
130d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
131d71ae5a4SJacob Faibussowitsch {
132f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
133a30f8f8cSSatish Balay 
134a30f8f8cSSatish Balay   PetscFunctionBegin;
1359566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
1369566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
1373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
138a30f8f8cSSatish Balay }
139a30f8f8cSSatish Balay 
140d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
141a30f8f8cSSatish Balay   { \
142a30f8f8cSSatish Balay     brow = row / bs; \
1439371c9d4SSatish Balay     rp   = aj + ai[brow]; \
1449371c9d4SSatish Balay     ap   = aa + bs2 * ai[brow]; \
1459371c9d4SSatish Balay     rmax = aimax[brow]; \
1469371c9d4SSatish Balay     nrow = ailen[brow]; \
147a30f8f8cSSatish Balay     bcol = col / bs; \
1489371c9d4SSatish Balay     ridx = row % bs; \
1499371c9d4SSatish Balay     cidx = col % bs; \
1509371c9d4SSatish Balay     low  = 0; \
1519371c9d4SSatish Balay     high = nrow; \
152a30f8f8cSSatish Balay     while (high - low > 3) { \
153a30f8f8cSSatish Balay       t = (low + high) / 2; \
154a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
155a30f8f8cSSatish Balay       else low = t; \
156a30f8f8cSSatish Balay     } \
157a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
158a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
159a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
160a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
161a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
162a30f8f8cSSatish Balay         else *bap = value; \
163a30f8f8cSSatish Balay         goto a_noinsert; \
164a30f8f8cSSatish Balay       } \
165a30f8f8cSSatish Balay     } \
166a30f8f8cSSatish Balay     if (a->nonew == 1) goto a_noinsert; \
1675f80ce2aSJacob Faibussowitsch     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
168fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
169a30f8f8cSSatish Balay     N = nrow++ - 1; \
170a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
1719566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
1729566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
1739566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
174a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
175a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
176e56f5c9eSBarry Smith     A->nonzerostate++; \
177a30f8f8cSSatish Balay   a_noinsert:; \
178a30f8f8cSSatish Balay     ailen[brow] = nrow; \
179a30f8f8cSSatish Balay   }
180e5e170daSBarry Smith 
181d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
182a30f8f8cSSatish Balay   { \
183a30f8f8cSSatish Balay     brow = row / bs; \
1849371c9d4SSatish Balay     rp   = bj + bi[brow]; \
1859371c9d4SSatish Balay     ap   = ba + bs2 * bi[brow]; \
1869371c9d4SSatish Balay     rmax = bimax[brow]; \
1879371c9d4SSatish Balay     nrow = bilen[brow]; \
188a30f8f8cSSatish Balay     bcol = col / bs; \
1899371c9d4SSatish Balay     ridx = row % bs; \
1909371c9d4SSatish Balay     cidx = col % bs; \
1919371c9d4SSatish Balay     low  = 0; \
1929371c9d4SSatish Balay     high = nrow; \
193a30f8f8cSSatish Balay     while (high - low > 3) { \
194a30f8f8cSSatish Balay       t = (low + high) / 2; \
195a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
196a30f8f8cSSatish Balay       else low = t; \
197a30f8f8cSSatish Balay     } \
198a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
199a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
200a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
201a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
202a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
203a30f8f8cSSatish Balay         else *bap = value; \
204a30f8f8cSSatish Balay         goto b_noinsert; \
205a30f8f8cSSatish Balay       } \
206a30f8f8cSSatish Balay     } \
207a30f8f8cSSatish Balay     if (b->nonew == 1) goto b_noinsert; \
2085f80ce2aSJacob Faibussowitsch     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
209fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
210a30f8f8cSSatish Balay     N = nrow++ - 1; \
211a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2129566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2139566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2149566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
215a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
216a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
217e56f5c9eSBarry Smith     B->nonzerostate++; \
218a30f8f8cSSatish Balay   b_noinsert:; \
219a30f8f8cSSatish Balay     bilen[brow] = nrow; \
220a30f8f8cSSatish Balay   }
221a30f8f8cSSatish Balay 
222a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
223da81f932SPierre Jolivet    Any a(i,j) with i>j input by user is ignored or generates an error
224a30f8f8cSSatish Balay */
225d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
226d71ae5a4SJacob Faibussowitsch {
227a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
228a30f8f8cSSatish Balay   MatScalar     value;
229ace3abfcSBarry Smith   PetscBool     roworiented = baij->roworiented;
2301302d50aSBarry Smith   PetscInt      i, j, row, col;
231d0f46423SBarry Smith   PetscInt      rstart_orig = mat->rmap->rstart;
232d0f46423SBarry Smith   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
233d0f46423SBarry Smith   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
234a30f8f8cSSatish Balay 
235a30f8f8cSSatish Balay   /* Some Variables required in the macro */
236a30f8f8cSSatish Balay   Mat           A     = baij->A;
237a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
2381302d50aSBarry Smith   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
239a30f8f8cSSatish Balay   MatScalar    *aa = a->a;
240a30f8f8cSSatish Balay 
241a30f8f8cSSatish Balay   Mat          B     = baij->B;
242a30f8f8cSSatish Balay   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
2431302d50aSBarry Smith   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
244a30f8f8cSSatish Balay   MatScalar   *ba = b->a;
245a30f8f8cSSatish Balay 
2461302d50aSBarry Smith   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
2471302d50aSBarry Smith   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
248a30f8f8cSSatish Balay   MatScalar *ap, *bap;
249a30f8f8cSSatish Balay 
250a30f8f8cSSatish Balay   /* for stash */
2510298fd71SBarry Smith   PetscInt   n_loc, *in_loc = NULL;
2520298fd71SBarry Smith   MatScalar *v_loc = NULL;
253a30f8f8cSSatish Balay 
254a30f8f8cSSatish Balay   PetscFunctionBegin;
255a30f8f8cSSatish Balay   if (!baij->donotstash) {
25659ffdab8SBarry Smith     if (n > baij->n_loc) {
2579566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->in_loc));
2589566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->v_loc));
2599566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->in_loc));
2609566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->v_loc));
26126fbe8dcSKarl Rupp 
26259ffdab8SBarry Smith       baij->n_loc = n;
26359ffdab8SBarry Smith     }
26459ffdab8SBarry Smith     in_loc = baij->in_loc;
26559ffdab8SBarry Smith     v_loc  = baij->v_loc;
266a30f8f8cSSatish Balay   }
267a30f8f8cSSatish Balay 
268a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
269a30f8f8cSSatish Balay     if (im[i] < 0) continue;
2705f80ce2aSJacob Faibussowitsch     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
271a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
272a30f8f8cSSatish Balay       row = im[i] - rstart_orig;                     /* local row index */
273a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
27401b2bd88SHong Zhang         if (im[i] / bs > in[j] / bs) {
27501b2bd88SHong Zhang           if (a->ignore_ltriangular) {
27601b2bd88SHong Zhang             continue; /* ignore lower triangular blocks */
27726fbe8dcSKarl Rupp           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
27801b2bd88SHong Zhang         }
279a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
280a30f8f8cSSatish Balay           col  = in[j] - cstart_orig;                    /* local col index */
2819371c9d4SSatish Balay           brow = row / bs;
2829371c9d4SSatish Balay           bcol = col / bs;
283a30f8f8cSSatish Balay           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
284db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
285db4deed7SKarl Rupp           else value = v[i + j * m];
286d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
2879566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
288f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
289f7d195e4SLawrence Mitchell           continue;
290f7d195e4SLawrence Mitchell         } else {
291f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
292f7d195e4SLawrence Mitchell           /* off-diag entry (B) */
293a30f8f8cSSatish Balay           if (mat->was_assembled) {
29448a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
295a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
296eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
29771730473SSatish Balay             col = col - 1;
298a30f8f8cSSatish Balay #else
29971730473SSatish Balay             col = baij->colmap[in[j] / bs] - 1;
300a30f8f8cSSatish Balay #endif
301a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
3029566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
303a30f8f8cSSatish Balay               col = in[j];
304a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
305a30f8f8cSSatish Balay               B     = baij->B;
306a30f8f8cSSatish Balay               b     = (Mat_SeqBAIJ *)(B)->data;
3079371c9d4SSatish Balay               bimax = b->imax;
3089371c9d4SSatish Balay               bi    = b->i;
3099371c9d4SSatish Balay               bilen = b->ilen;
3109371c9d4SSatish Balay               bj    = b->j;
311a30f8f8cSSatish Balay               ba    = b->a;
31271730473SSatish Balay             } else col += in[j] % bs;
313a30f8f8cSSatish Balay           } else col = in[j];
314db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
315db4deed7SKarl Rupp           else value = v[i + j * m];
316d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
3179566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
318a30f8f8cSSatish Balay         }
319a30f8f8cSSatish Balay       }
320a30f8f8cSSatish Balay     } else { /* off processor entry */
3215f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
322a30f8f8cSSatish Balay       if (!baij->donotstash) {
3235080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
324a30f8f8cSSatish Balay         n_loc          = 0;
325a30f8f8cSSatish Balay         for (j = 0; j < n; j++) {
326f65c83cfSHong Zhang           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
327a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
328a30f8f8cSSatish Balay           if (roworiented) {
329a30f8f8cSSatish Balay             v_loc[n_loc] = v[i * n + j];
330a30f8f8cSSatish Balay           } else {
331a30f8f8cSSatish Balay             v_loc[n_loc] = v[j * m + i];
332a30f8f8cSSatish Balay           }
333a30f8f8cSSatish Balay           n_loc++;
334a30f8f8cSSatish Balay         }
3359566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
336a30f8f8cSSatish Balay       }
337a30f8f8cSSatish Balay     }
338a30f8f8cSSatish Balay   }
3393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
340a30f8f8cSSatish Balay }
341a30f8f8cSSatish Balay 
342d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
343d71ae5a4SJacob Faibussowitsch {
34436bd2089SBarry Smith   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
34536bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
34636bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
34736bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
34836bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
34936bd2089SBarry Smith   const PetscScalar *value       = v;
35036bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
35136bd2089SBarry Smith 
35236bd2089SBarry Smith   PetscFunctionBegin;
35336bd2089SBarry Smith   if (col < row) {
3543ba16761SJacob Faibussowitsch     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
35536bd2089SBarry Smith     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
35636bd2089SBarry Smith   }
35736bd2089SBarry Smith   rp    = aj + ai[row];
35836bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
35936bd2089SBarry Smith   rmax  = imax[row];
36036bd2089SBarry Smith   nrow  = ailen[row];
36136bd2089SBarry Smith   value = v;
36236bd2089SBarry Smith   low   = 0;
36336bd2089SBarry Smith   high  = nrow;
36436bd2089SBarry Smith 
36536bd2089SBarry Smith   while (high - low > 7) {
36636bd2089SBarry Smith     t = (low + high) / 2;
36736bd2089SBarry Smith     if (rp[t] > col) high = t;
36836bd2089SBarry Smith     else low = t;
36936bd2089SBarry Smith   }
37036bd2089SBarry Smith   for (i = low; i < high; i++) {
37136bd2089SBarry Smith     if (rp[i] > col) break;
37236bd2089SBarry Smith     if (rp[i] == col) {
37336bd2089SBarry Smith       bap = ap + bs2 * i;
37436bd2089SBarry Smith       if (roworiented) {
37536bd2089SBarry Smith         if (is == ADD_VALUES) {
37636bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
377ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
37836bd2089SBarry Smith           }
37936bd2089SBarry Smith         } else {
38036bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
381ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
38236bd2089SBarry Smith           }
38336bd2089SBarry Smith         }
38436bd2089SBarry Smith       } else {
38536bd2089SBarry Smith         if (is == ADD_VALUES) {
38636bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
387ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
38836bd2089SBarry Smith           }
38936bd2089SBarry Smith         } else {
39036bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
391ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
39236bd2089SBarry Smith           }
39336bd2089SBarry Smith         }
39436bd2089SBarry Smith       }
39536bd2089SBarry Smith       goto noinsert2;
39636bd2089SBarry Smith     }
39736bd2089SBarry Smith   }
39836bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
3995f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
40036bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4019371c9d4SSatish Balay   N = nrow++ - 1;
4029371c9d4SSatish Balay   high++;
40336bd2089SBarry Smith   /* shift up all the later entries in this row */
4049566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4059566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
40636bd2089SBarry Smith   rp[i] = col;
40736bd2089SBarry Smith   bap   = ap + bs2 * i;
40836bd2089SBarry Smith   if (roworiented) {
40936bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
410ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
41136bd2089SBarry Smith     }
41236bd2089SBarry Smith   } else {
41336bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
414ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
41536bd2089SBarry Smith     }
41636bd2089SBarry Smith   }
41736bd2089SBarry Smith noinsert2:;
41836bd2089SBarry Smith   ailen[row] = nrow;
4193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
42036bd2089SBarry Smith }
42136bd2089SBarry Smith 
42236bd2089SBarry Smith /*
42336bd2089SBarry Smith    This routine is exactly duplicated in mpibaij.c
42436bd2089SBarry Smith */
425d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
426d71ae5a4SJacob Faibussowitsch {
42736bd2089SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
42836bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
42936bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
43036bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
43136bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
43236bd2089SBarry Smith   const PetscScalar *value       = v;
43336bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
43436bd2089SBarry Smith 
43536bd2089SBarry Smith   PetscFunctionBegin;
43636bd2089SBarry Smith   rp    = aj + ai[row];
43736bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
43836bd2089SBarry Smith   rmax  = imax[row];
43936bd2089SBarry Smith   nrow  = ailen[row];
44036bd2089SBarry Smith   low   = 0;
44136bd2089SBarry Smith   high  = nrow;
44236bd2089SBarry Smith   value = v;
44336bd2089SBarry Smith   while (high - low > 7) {
44436bd2089SBarry Smith     t = (low + high) / 2;
44536bd2089SBarry Smith     if (rp[t] > col) high = t;
44636bd2089SBarry Smith     else low = t;
44736bd2089SBarry Smith   }
44836bd2089SBarry Smith   for (i = low; i < high; i++) {
44936bd2089SBarry Smith     if (rp[i] > col) break;
45036bd2089SBarry Smith     if (rp[i] == col) {
45136bd2089SBarry Smith       bap = ap + bs2 * i;
45236bd2089SBarry Smith       if (roworiented) {
45336bd2089SBarry Smith         if (is == ADD_VALUES) {
45436bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
455ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
45636bd2089SBarry Smith           }
45736bd2089SBarry Smith         } else {
45836bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
459ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
46036bd2089SBarry Smith           }
46136bd2089SBarry Smith         }
46236bd2089SBarry Smith       } else {
46336bd2089SBarry Smith         if (is == ADD_VALUES) {
46436bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
465ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
46636bd2089SBarry Smith             bap += bs;
46736bd2089SBarry Smith           }
46836bd2089SBarry Smith         } else {
46936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
470ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
47136bd2089SBarry Smith             bap += bs;
47236bd2089SBarry Smith           }
47336bd2089SBarry Smith         }
47436bd2089SBarry Smith       }
47536bd2089SBarry Smith       goto noinsert2;
47636bd2089SBarry Smith     }
47736bd2089SBarry Smith   }
47836bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
4795f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
48036bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4819371c9d4SSatish Balay   N = nrow++ - 1;
4829371c9d4SSatish Balay   high++;
48336bd2089SBarry Smith   /* shift up all the later entries in this row */
4849566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4859566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
48636bd2089SBarry Smith   rp[i] = col;
48736bd2089SBarry Smith   bap   = ap + bs2 * i;
48836bd2089SBarry Smith   if (roworiented) {
48936bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
490ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
49136bd2089SBarry Smith     }
49236bd2089SBarry Smith   } else {
49336bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
494ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
49536bd2089SBarry Smith     }
49636bd2089SBarry Smith   }
49736bd2089SBarry Smith noinsert2:;
49836bd2089SBarry Smith   ailen[row] = nrow;
4993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
50036bd2089SBarry Smith }
50136bd2089SBarry Smith 
50236bd2089SBarry Smith /*
50336bd2089SBarry Smith     This routine could be optimized by removing the need for the block copy below and passing stride information
50436bd2089SBarry Smith   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
50536bd2089SBarry Smith */
506d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
507d71ae5a4SJacob Faibussowitsch {
5080880e062SHong Zhang   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
509f15d580aSBarry Smith   const MatScalar *value;
510f15d580aSBarry Smith   MatScalar       *barray      = baij->barray;
511ace3abfcSBarry Smith   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
512899cda47SBarry Smith   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
513476417e5SBarry Smith   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
514476417e5SBarry Smith   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
5150880e062SHong Zhang 
516a30f8f8cSSatish Balay   PetscFunctionBegin;
5170880e062SHong Zhang   if (!barray) {
5189566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2, &barray));
5190880e062SHong Zhang     baij->barray = barray;
5200880e062SHong Zhang   }
5210880e062SHong Zhang 
5220880e062SHong Zhang   if (roworiented) {
5230880e062SHong Zhang     stepval = (n - 1) * bs;
5240880e062SHong Zhang   } else {
5250880e062SHong Zhang     stepval = (m - 1) * bs;
5260880e062SHong Zhang   }
5270880e062SHong Zhang   for (i = 0; i < m; i++) {
5280880e062SHong Zhang     if (im[i] < 0) continue;
5296bdcaf15SBarry Smith     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
5300880e062SHong Zhang     if (im[i] >= rstart && im[i] < rend) {
5310880e062SHong Zhang       row = im[i] - rstart;
5320880e062SHong Zhang       for (j = 0; j < n; j++) {
533f3f98c53SJed Brown         if (im[i] > in[j]) {
534f3f98c53SJed Brown           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
535e32f2f54SBarry Smith           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
536f3f98c53SJed Brown         }
5370880e062SHong Zhang         /* If NumCol = 1 then a copy is not required */
5380880e062SHong Zhang         if ((roworiented) && (n == 1)) {
539f15d580aSBarry Smith           barray = (MatScalar *)v + i * bs2;
5400880e062SHong Zhang         } else if ((!roworiented) && (m == 1)) {
541f15d580aSBarry Smith           barray = (MatScalar *)v + j * bs2;
5420880e062SHong Zhang         } else { /* Here a copy is required */
5430880e062SHong Zhang           if (roworiented) {
5440880e062SHong Zhang             value = v + i * (stepval + bs) * bs + j * bs;
5450880e062SHong Zhang           } else {
5460880e062SHong Zhang             value = v + j * (stepval + bs) * bs + i * bs;
5470880e062SHong Zhang           }
5480880e062SHong Zhang           for (ii = 0; ii < bs; ii++, value += stepval) {
549ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
5500880e062SHong Zhang           }
5510880e062SHong Zhang           barray -= bs2;
5520880e062SHong Zhang         }
5530880e062SHong Zhang 
5540880e062SHong Zhang         if (in[j] >= cstart && in[j] < cend) {
5550880e062SHong Zhang           col = in[j] - cstart;
5569566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
557f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
558f7d195e4SLawrence Mitchell           continue;
559f7d195e4SLawrence Mitchell         } else {
560f7d195e4SLawrence Mitchell           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
5610880e062SHong Zhang           if (mat->was_assembled) {
56248a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
5630880e062SHong Zhang 
5642515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
5650880e062SHong Zhang   #if defined(PETSC_USE_CTABLE)
5669371c9d4SSatish Balay             {
5679371c9d4SSatish Balay               PetscInt data;
568eec179cfSJacob Faibussowitsch               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
56908401ef6SPierre Jolivet               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
5700880e062SHong Zhang             }
5710880e062SHong Zhang   #else
57208401ef6SPierre Jolivet             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
5730880e062SHong Zhang   #endif
5740880e062SHong Zhang #endif
5750880e062SHong Zhang #if defined(PETSC_USE_CTABLE)
576eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
5770880e062SHong Zhang             col = (col - 1) / bs;
5780880e062SHong Zhang #else
5790880e062SHong Zhang             col = (baij->colmap[in[j]] - 1) / bs;
5800880e062SHong Zhang #endif
5810880e062SHong Zhang             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
5829566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
5830880e062SHong Zhang               col = in[j];
5840880e062SHong Zhang             }
58526fbe8dcSKarl Rupp           } else col = in[j];
5869566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
5870880e062SHong Zhang         }
5880880e062SHong Zhang       }
5890880e062SHong Zhang     } else {
5905f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
5910880e062SHong Zhang       if (!baij->donotstash) {
5920880e062SHong Zhang         if (roworiented) {
5939566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
5940880e062SHong Zhang         } else {
5959566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
5960880e062SHong Zhang         }
5970880e062SHong Zhang       }
5980880e062SHong Zhang     }
5990880e062SHong Zhang   }
6003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
601a30f8f8cSSatish Balay }
602a30f8f8cSSatish Balay 
603d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
604d71ae5a4SJacob Faibussowitsch {
605f3566a2aSHong Zhang   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
606d0f46423SBarry Smith   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
607d0f46423SBarry Smith   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
608a30f8f8cSSatish Balay 
609a30f8f8cSSatish Balay   PetscFunctionBegin;
610a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
61154c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
61254c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
613a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
614a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
615a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
61654c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
61754c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
618a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
619a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
6209566063dSJacob Faibussowitsch           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
621a30f8f8cSSatish Balay         } else {
62248a46eb9SPierre Jolivet           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
623a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
624eec179cfSJacob Faibussowitsch           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
625a30f8f8cSSatish Balay           data--;
626a30f8f8cSSatish Balay #else
627a30f8f8cSSatish Balay           data = baij->colmap[idxn[j] / bs] - 1;
628a30f8f8cSSatish Balay #endif
629a30f8f8cSSatish Balay           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
630a30f8f8cSSatish Balay           else {
631a30f8f8cSSatish Balay             col = data + idxn[j] % bs;
6329566063dSJacob Faibussowitsch             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
633a30f8f8cSSatish Balay           }
634a30f8f8cSSatish Balay         }
635a30f8f8cSSatish Balay       }
636f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
637a30f8f8cSSatish Balay   }
6383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
639a30f8f8cSSatish Balay }
640a30f8f8cSSatish Balay 
641d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
642d71ae5a4SJacob Faibussowitsch {
643a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
644a30f8f8cSSatish Balay   PetscReal     sum[2], *lnorm2;
645a30f8f8cSSatish Balay 
646a30f8f8cSSatish Balay   PetscFunctionBegin;
647a30f8f8cSSatish Balay   if (baij->size == 1) {
6489566063dSJacob Faibussowitsch     PetscCall(MatNorm(baij->A, type, norm));
649a30f8f8cSSatish Balay   } else {
650a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
6519566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(2, &lnorm2));
6529566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->A, type, lnorm2));
6539371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
6549371c9d4SSatish Balay       lnorm2++; /* squar power of norm(A) */
6559566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->B, type, lnorm2));
6569371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
6579371c9d4SSatish Balay       lnorm2--; /* squar power of norm(B) */
6581c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
6598f1a2a5eSBarry Smith       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
6609566063dSJacob Faibussowitsch       PetscCall(PetscFree(lnorm2));
6610b8dc8d2SHong Zhang     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
6620b8dc8d2SHong Zhang       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
6630b8dc8d2SHong Zhang       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
6640b8dc8d2SHong Zhang       PetscReal    *rsum, *rsum2, vabs;
665899cda47SBarry Smith       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
666d0f46423SBarry Smith       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
6670b8dc8d2SHong Zhang       MatScalar    *v;
6680b8dc8d2SHong Zhang 
6699566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
6709566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
6710b8dc8d2SHong Zhang       /* Amat */
6729371c9d4SSatish Balay       v  = amat->a;
6739371c9d4SSatish Balay       jj = amat->j;
6740b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
6750b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
6760b8dc8d2SHong Zhang         nz   = amat->i[brow + 1] - amat->i[brow];
6770b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
6789371c9d4SSatish Balay           gcol = bs * (rstart + *jj);
6799371c9d4SSatish Balay           jj++;
6800b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
6810b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
6829371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
6839371c9d4SSatish Balay               v++;
6840b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
6850b8dc8d2SHong Zhang               /* non-diagonal block */
6860b8dc8d2SHong Zhang               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
6870b8dc8d2SHong Zhang             }
6880b8dc8d2SHong Zhang           }
6890b8dc8d2SHong Zhang         }
6909566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
6910b8dc8d2SHong Zhang       }
6920b8dc8d2SHong Zhang       /* Bmat */
6939371c9d4SSatish Balay       v  = bmat->a;
6949371c9d4SSatish Balay       jj = bmat->j;
6950b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
6960b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
6970b8dc8d2SHong Zhang         nz   = bmat->i[brow + 1] - bmat->i[brow];
6980b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
6999371c9d4SSatish Balay           gcol = bs * garray[*jj];
7009371c9d4SSatish Balay           jj++;
7010b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7020b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7039371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7049371c9d4SSatish Balay               v++;
7050b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7060b8dc8d2SHong Zhang               rsum[grow + row] += vabs;
7070b8dc8d2SHong Zhang             }
7080b8dc8d2SHong Zhang           }
7090b8dc8d2SHong Zhang         }
7109566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7110b8dc8d2SHong Zhang       }
7121c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7130b8dc8d2SHong Zhang       *norm = 0.0;
714d0f46423SBarry Smith       for (col = 0; col < mat->cmap->N; col++) {
7150b8dc8d2SHong Zhang         if (rsum2[col] > *norm) *norm = rsum2[col];
7160b8dc8d2SHong Zhang       }
7179566063dSJacob Faibussowitsch       PetscCall(PetscFree2(rsum, rsum2));
718f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
719a30f8f8cSSatish Balay   }
7203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
721a30f8f8cSSatish Balay }
722a30f8f8cSSatish Balay 
723d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
724d71ae5a4SJacob Faibussowitsch {
725a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
7261302d50aSBarry Smith   PetscInt      nstash, reallocs;
727a30f8f8cSSatish Balay 
728a30f8f8cSSatish Balay   PetscFunctionBegin;
7293ba16761SJacob Faibussowitsch   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
730a30f8f8cSSatish Balay 
7319566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
7329566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
7339566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7349566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
7359566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7369566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
7373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
738a30f8f8cSSatish Balay }
739a30f8f8cSSatish Balay 
740d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
741d71ae5a4SJacob Faibussowitsch {
742a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
743a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
74413f74950SBarry Smith   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
745e44c0bd4SBarry Smith   PetscInt     *row, *col;
746ace3abfcSBarry Smith   PetscBool     other_disassembled;
74713f74950SBarry Smith   PetscMPIInt   n;
748ace3abfcSBarry Smith   PetscBool     r1, r2, r3;
749a30f8f8cSSatish Balay   MatScalar    *val;
750a30f8f8cSSatish Balay 
75191c97fd4SSatish Balay   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
752a30f8f8cSSatish Balay   PetscFunctionBegin;
7534cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
754a30f8f8cSSatish Balay     while (1) {
7559566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
756a30f8f8cSSatish Balay       if (!flg) break;
757a30f8f8cSSatish Balay 
758a30f8f8cSSatish Balay       for (i = 0; i < n;) {
759a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
76026fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
76126fbe8dcSKarl Rupp           if (row[j] != rstart) break;
76226fbe8dcSKarl Rupp         }
763a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
764a30f8f8cSSatish Balay         else ncols = n - i;
765a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
7669566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
767a30f8f8cSSatish Balay         i = j;
768a30f8f8cSSatish Balay       }
769a30f8f8cSSatish Balay     }
7709566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
771a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
772a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
773a30f8f8cSSatish Balay        restore the original flags */
774a30f8f8cSSatish Balay     r1 = baij->roworiented;
775a30f8f8cSSatish Balay     r2 = a->roworiented;
77691c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
77726fbe8dcSKarl Rupp 
778a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
779a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
78026fbe8dcSKarl Rupp 
78191c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
782a30f8f8cSSatish Balay     while (1) {
7839566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
784a30f8f8cSSatish Balay       if (!flg) break;
785a30f8f8cSSatish Balay 
786a30f8f8cSSatish Balay       for (i = 0; i < n;) {
787a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
78826fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
78926fbe8dcSKarl Rupp           if (row[j] != rstart) break;
79026fbe8dcSKarl Rupp         }
791a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
792a30f8f8cSSatish Balay         else ncols = n - i;
7939566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
794a30f8f8cSSatish Balay         i = j;
795a30f8f8cSSatish Balay       }
796a30f8f8cSSatish Balay     }
7979566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
79826fbe8dcSKarl Rupp 
799a30f8f8cSSatish Balay     baij->roworiented = r1;
800a30f8f8cSSatish Balay     a->roworiented    = r2;
80126fbe8dcSKarl Rupp 
80291c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
803a30f8f8cSSatish Balay   }
804a30f8f8cSSatish Balay 
8059566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->A, mode));
8069566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->A, mode));
807a30f8f8cSSatish Balay 
808a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
8096aad120cSJose E. Roman      also disassemble ourselves, in order that we may reassemble. */
810a30f8f8cSSatish Balay   /*
811a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
812a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
813a30f8f8cSSatish Balay   */
814a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
8155f9db2b2SJunchao Zhang     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
81648a46eb9SPierre Jolivet     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
817a30f8f8cSSatish Balay   }
818a30f8f8cSSatish Balay 
8199371c9d4SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
8209566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->B, mode));
8219566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->B, mode));
822a30f8f8cSSatish Balay 
8239566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
82426fbe8dcSKarl Rupp 
825f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
8264f9cfa9eSBarry Smith 
8274f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
8284f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
829e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
8301c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
831e56f5c9eSBarry Smith   }
8323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
833a30f8f8cSSatish Balay }
834a30f8f8cSSatish Balay 
835dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
8369804daf3SBarry Smith #include <petscdraw.h>
837d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
838d71ae5a4SJacob Faibussowitsch {
839a30f8f8cSSatish Balay   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
840d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
8417da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
842ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
843b0a32e0cSBarry Smith   PetscViewer       sviewer;
844f3ef73ceSBarry Smith   PetscViewerFormat format;
845a30f8f8cSSatish Balay 
846a30f8f8cSSatish Balay   PetscFunctionBegin;
8479566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
8489566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
84932077d6dSBarry Smith   if (iascii) {
8509566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
851456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
852a30f8f8cSSatish Balay       MatInfo info;
8539566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
8549566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
8559566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
8569371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
8579371c9d4SSatish Balay                                                    mat->rmap->bs, (double)info.memory));
8589566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
8599566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
8609566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
8619566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
8629566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
8639566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
8649566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
8659566063dSJacob Faibussowitsch       PetscCall(VecScatterView(baij->Mvctx, viewer));
8663ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
867fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
8689566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
8693ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
870c1490034SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
8713ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
872a30f8f8cSSatish Balay     }
873a30f8f8cSSatish Balay   }
874a30f8f8cSSatish Balay 
875a30f8f8cSSatish Balay   if (isdraw) {
876b0a32e0cSBarry Smith     PetscDraw draw;
877ace3abfcSBarry Smith     PetscBool isnull;
8789566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
8799566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
8803ba16761SJacob Faibussowitsch     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
881a30f8f8cSSatish Balay   }
882a30f8f8cSSatish Balay 
8837da1fb6eSBarry Smith   {
884a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
885a30f8f8cSSatish Balay     Mat           A;
88665d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
88765d70643SHong Zhang     Mat_SeqBAIJ  *Bloc;
888d0f46423SBarry Smith     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
889a30f8f8cSSatish Balay     MatScalar    *a;
8903e219373SBarry Smith     const char   *matname;
891a30f8f8cSSatish Balay 
892f204ca49SKris Buschelman     /* Should this be the same type as mat? */
8939566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
894dd400576SPatrick Sanan     if (rank == 0) {
8959566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
896a30f8f8cSSatish Balay     } else {
8979566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
898a30f8f8cSSatish Balay     }
8999566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPISBAIJ));
9009566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
9019566063dSJacob Faibussowitsch     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
902a30f8f8cSSatish Balay 
903a30f8f8cSSatish Balay     /* copy over the A part */
90465d70643SHong Zhang     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
9059371c9d4SSatish Balay     ai   = Aloc->i;
9069371c9d4SSatish Balay     aj   = Aloc->j;
9079371c9d4SSatish Balay     a    = Aloc->a;
9089566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs, &rvals));
909a30f8f8cSSatish Balay 
910a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
911e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
91226fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
913a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
914e9f7bc9eSHong Zhang         col = (baij->cstartbs + aj[j]) * bs;
915a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9169566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
91726fbe8dcSKarl Rupp           col++;
91826fbe8dcSKarl Rupp           a += bs;
919a30f8f8cSSatish Balay         }
920a30f8f8cSSatish Balay       }
921a30f8f8cSSatish Balay     }
922a30f8f8cSSatish Balay     /* copy over the B part */
92365d70643SHong Zhang     Bloc = (Mat_SeqBAIJ *)baij->B->data;
9249371c9d4SSatish Balay     ai   = Bloc->i;
9259371c9d4SSatish Balay     aj   = Bloc->j;
9269371c9d4SSatish Balay     a    = Bloc->a;
927a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
928e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
92926fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
930a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
931a30f8f8cSSatish Balay         col = baij->garray[aj[j]] * bs;
932a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9339566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
93426fbe8dcSKarl Rupp           col++;
93526fbe8dcSKarl Rupp           a += bs;
936a30f8f8cSSatish Balay         }
937a30f8f8cSSatish Balay       }
938a30f8f8cSSatish Balay     }
9399566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvals));
9409566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
9419566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
942a30f8f8cSSatish Balay     /*
943a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
944b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
945a30f8f8cSSatish Balay     */
9469566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
947*23a3927dSBarry Smith     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
948dd400576SPatrick Sanan     if (rank == 0) {
949*23a3927dSBarry Smith       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
9509566063dSJacob Faibussowitsch       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
951a30f8f8cSSatish Balay     }
9529566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
9539566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
9549566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
955a30f8f8cSSatish Balay   }
9563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
957a30f8f8cSSatish Balay }
958a30f8f8cSSatish Balay 
959618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
960618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
961d1654148SHong Zhang 
962d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
963d71ae5a4SJacob Faibussowitsch {
964ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
965a30f8f8cSSatish Balay 
966a30f8f8cSSatish Balay   PetscFunctionBegin;
9679566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
9689566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
9699566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
9709566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
971d1654148SHong Zhang   if (iascii || isdraw || issocket) {
9729566063dSJacob Faibussowitsch     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
9731baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
9743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
975a30f8f8cSSatish Balay }
976a30f8f8cSSatish Balay 
977d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
978d71ae5a4SJacob Faibussowitsch {
979a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
980a30f8f8cSSatish Balay 
981a30f8f8cSSatish Balay   PetscFunctionBegin;
982a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
9833ba16761SJacob Faibussowitsch   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
984a30f8f8cSSatish Balay #endif
9859566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
9869566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->bstash));
9879566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&baij->A));
9889566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&baij->B));
989a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
990eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&baij->colmap));
991a30f8f8cSSatish Balay #else
9929566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->colmap));
993a30f8f8cSSatish Balay #endif
9949566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
9959566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->lvec));
9969566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->Mvctx));
9979566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0));
9989566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0b));
9999566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1));
10009566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1a));
10019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1b));
10029566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->sMvctx));
10039566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
10049566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->barray));
10059566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->hd));
10069566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->diag));
10079566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->bb1));
10089566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->xx1));
1009ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
10109566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->setvaluescopy));
1011a30f8f8cSSatish Balay #endif
10129566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->in_loc));
10139566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->v_loc));
10149566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->rangebs));
10159566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
1016901853e0SKris Buschelman 
10179566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
10189566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
10199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
10209566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
10219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
10226214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
10239566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
10246214f412SHong Zhang #endif
1025d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
10269566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
1027d24d4204SJose E. Roman #endif
10289566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
10299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
10303ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1031a30f8f8cSSatish Balay }
1032a30f8f8cSSatish Balay 
1033d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1034d71ae5a4SJacob Faibussowitsch {
1035547795f9SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1036eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
10376de40e93SBarry Smith   PetscScalar       *from;
10386de40e93SBarry Smith   const PetscScalar *x;
1039547795f9SHong Zhang 
1040547795f9SHong Zhang   PetscFunctionBegin;
1041547795f9SHong Zhang   /* diagonal part */
10429566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
10439566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, 0.0));
1044547795f9SHong Zhang 
1045547795f9SHong Zhang   /* subdiagonal part */
10465f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
10479566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1048547795f9SHong Zhang 
1049547795f9SHong Zhang   /* copy x into the vec slvec0 */
10509566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10519566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1052547795f9SHong Zhang 
10539566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1056547795f9SHong Zhang 
10579566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10589566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1059547795f9SHong Zhang   /* supperdiagonal part */
10609566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
10613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1062547795f9SHong Zhang }
1063547795f9SHong Zhang 
1064d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1065d71ae5a4SJacob Faibussowitsch {
1066a9d4b620SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1067eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1068d9ca1df4SBarry Smith   PetscScalar       *from;
1069d9ca1df4SBarry Smith   const PetscScalar *x;
1070a9d4b620SHong Zhang 
1071a9d4b620SHong Zhang   PetscFunctionBegin;
1072a9d4b620SHong Zhang   /* diagonal part */
10739566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
10749566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, 0.0));
1075a9d4b620SHong Zhang 
1076a9d4b620SHong Zhang   /* subdiagonal part */
10779566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1078fc165ae2SBarry Smith 
1079a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
10809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10819566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1082a9d4b620SHong Zhang 
10839566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1086fc165ae2SBarry Smith 
10879566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10889566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1089a9d4b620SHong Zhang   /* supperdiagonal part */
10909566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
10913ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1092a9d4b620SHong Zhang }
1093a9d4b620SHong Zhang 
1094d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1095d71ae5a4SJacob Faibussowitsch {
1096eb1ec7c1SStefano Zampini   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1097eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1098eb1ec7c1SStefano Zampini   PetscScalar       *from, zero       = 0.0;
1099eb1ec7c1SStefano Zampini   const PetscScalar *x;
1100eb1ec7c1SStefano Zampini 
1101eb1ec7c1SStefano Zampini   PetscFunctionBegin;
1102eb1ec7c1SStefano Zampini   /* diagonal part */
11039566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
11049566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, zero));
1105eb1ec7c1SStefano Zampini 
1106eb1ec7c1SStefano Zampini   /* subdiagonal part */
11075f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
11089566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1109eb1ec7c1SStefano Zampini 
1110eb1ec7c1SStefano Zampini   /* copy x into the vec slvec0 */
11119566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11129566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11139566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11149566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1115eb1ec7c1SStefano Zampini 
11169566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11179566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11189566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1119eb1ec7c1SStefano Zampini 
1120eb1ec7c1SStefano Zampini   /* supperdiagonal part */
11219566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
11223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1123eb1ec7c1SStefano Zampini }
1124eb1ec7c1SStefano Zampini 
1125d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1126d71ae5a4SJacob Faibussowitsch {
1127de8b6608SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1128d0f46423SBarry Smith   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1129d9ca1df4SBarry Smith   PetscScalar       *from, zero       = 0.0;
1130d9ca1df4SBarry Smith   const PetscScalar *x;
1131a9d4b620SHong Zhang 
1132a9d4b620SHong Zhang   PetscFunctionBegin;
1133a9d4b620SHong Zhang   /* diagonal part */
11349566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
11359566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, zero));
1136a9d4b620SHong Zhang 
1137a9d4b620SHong Zhang   /* subdiagonal part */
11389566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1139a9d4b620SHong Zhang 
1140a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
11419566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11429566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11439566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1145a9d4b620SHong Zhang 
11469566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11479566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11489566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1149a9d4b620SHong Zhang 
1150a9d4b620SHong Zhang   /* supperdiagonal part */
11519566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
11523ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1153a9d4b620SHong Zhang }
1154a9d4b620SHong Zhang 
1155a30f8f8cSSatish Balay /*
1156a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1157a30f8f8cSSatish Balay    diagonal block
1158a30f8f8cSSatish Balay */
1159d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1160d71ae5a4SJacob Faibussowitsch {
1161a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1162a30f8f8cSSatish Balay 
1163a30f8f8cSSatish Balay   PetscFunctionBegin;
116408401ef6SPierre Jolivet   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
11659566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A, v));
11663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1167a30f8f8cSSatish Balay }
1168a30f8f8cSSatish Balay 
1169d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1170d71ae5a4SJacob Faibussowitsch {
1171a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1172a30f8f8cSSatish Balay 
1173a30f8f8cSSatish Balay   PetscFunctionBegin;
11749566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
11759566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
11763ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1177a30f8f8cSSatish Balay }
1178a30f8f8cSSatish Balay 
1179d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1180d71ae5a4SJacob Faibussowitsch {
1181d0d4cfc2SHong Zhang   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1182d0d4cfc2SHong Zhang   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1183d0f46423SBarry Smith   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1184d0f46423SBarry Smith   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1185899cda47SBarry Smith   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1186d0d4cfc2SHong Zhang 
1187a30f8f8cSSatish Balay   PetscFunctionBegin;
11885f80ce2aSJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1189d0d4cfc2SHong Zhang   mat->getrowactive = PETSC_TRUE;
1190d0d4cfc2SHong Zhang 
1191d0d4cfc2SHong Zhang   if (!mat->rowvalues && (idx || v)) {
1192d0d4cfc2SHong Zhang     /*
1193d0d4cfc2SHong Zhang         allocate enough space to hold information from the longest row.
1194d0d4cfc2SHong Zhang     */
1195d0d4cfc2SHong Zhang     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1196d0d4cfc2SHong Zhang     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1197d0d4cfc2SHong Zhang     PetscInt      max = 1, mbs = mat->mbs, tmp;
1198d0d4cfc2SHong Zhang     for (i = 0; i < mbs; i++) {
1199d0d4cfc2SHong Zhang       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
120026fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1201d0d4cfc2SHong Zhang     }
12029566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1203d0d4cfc2SHong Zhang   }
1204d0d4cfc2SHong Zhang 
12055f80ce2aSJacob Faibussowitsch   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1206d0d4cfc2SHong Zhang   lrow = row - brstart; /* local row index */
1207d0d4cfc2SHong Zhang 
12089371c9d4SSatish Balay   pvA = &vworkA;
12099371c9d4SSatish Balay   pcA = &cworkA;
12109371c9d4SSatish Balay   pvB = &vworkB;
12119371c9d4SSatish Balay   pcB = &cworkB;
12129371c9d4SSatish Balay   if (!v) {
12139371c9d4SSatish Balay     pvA = NULL;
12149371c9d4SSatish Balay     pvB = NULL;
12159371c9d4SSatish Balay   }
12169371c9d4SSatish Balay   if (!idx) {
12179371c9d4SSatish Balay     pcA = NULL;
12189371c9d4SSatish Balay     if (!v) pcB = NULL;
12199371c9d4SSatish Balay   }
12209566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
12219566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1222d0d4cfc2SHong Zhang   nztot = nzA + nzB;
1223d0d4cfc2SHong Zhang 
1224d0d4cfc2SHong Zhang   cmap = mat->garray;
1225d0d4cfc2SHong Zhang   if (v || idx) {
1226d0d4cfc2SHong Zhang     if (nztot) {
1227d0d4cfc2SHong Zhang       /* Sort by increasing column numbers, assuming A and B already sorted */
1228d0d4cfc2SHong Zhang       PetscInt imark = -1;
1229d0d4cfc2SHong Zhang       if (v) {
1230d0d4cfc2SHong Zhang         *v = v_p = mat->rowvalues;
1231d0d4cfc2SHong Zhang         for (i = 0; i < nzB; i++) {
1232d0d4cfc2SHong Zhang           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1233d0d4cfc2SHong Zhang           else break;
1234d0d4cfc2SHong Zhang         }
1235d0d4cfc2SHong Zhang         imark = i;
1236d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1237d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1238d0d4cfc2SHong Zhang       }
1239d0d4cfc2SHong Zhang       if (idx) {
1240d0d4cfc2SHong Zhang         *idx = idx_p = mat->rowindices;
1241d0d4cfc2SHong Zhang         if (imark > -1) {
1242ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1243d0d4cfc2SHong Zhang         } else {
1244d0d4cfc2SHong Zhang           for (i = 0; i < nzB; i++) {
124526fbe8dcSKarl Rupp             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1246d0d4cfc2SHong Zhang             else break;
1247d0d4cfc2SHong Zhang           }
1248d0d4cfc2SHong Zhang           imark = i;
1249d0d4cfc2SHong Zhang         }
1250d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1251d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1252d0d4cfc2SHong Zhang       }
1253d0d4cfc2SHong Zhang     } else {
1254f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1255f4259b30SLisandro Dalcin       if (v) *v = NULL;
1256d0d4cfc2SHong Zhang     }
1257d0d4cfc2SHong Zhang   }
1258d0d4cfc2SHong Zhang   *nz = nztot;
12599566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
12609566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
12613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1262a30f8f8cSSatish Balay }
1263a30f8f8cSSatish Balay 
1264d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1265d71ae5a4SJacob Faibussowitsch {
1266a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1267a30f8f8cSSatish Balay 
1268a30f8f8cSSatish Balay   PetscFunctionBegin;
12695f80ce2aSJacob Faibussowitsch   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1270a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
12713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1272a30f8f8cSSatish Balay }
1273a30f8f8cSSatish Balay 
1274d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1275d71ae5a4SJacob Faibussowitsch {
1276d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1277d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1278d0d4cfc2SHong Zhang 
1279d0d4cfc2SHong Zhang   PetscFunctionBegin;
1280d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_TRUE;
12813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1282d0d4cfc2SHong Zhang }
1283d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1284d71ae5a4SJacob Faibussowitsch {
1285d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1286d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1287d0d4cfc2SHong Zhang 
1288d0d4cfc2SHong Zhang   PetscFunctionBegin;
1289d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_FALSE;
12903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1291d0d4cfc2SHong Zhang }
1292d0d4cfc2SHong Zhang 
1293d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1294d71ae5a4SJacob Faibussowitsch {
12955f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
12965f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
12972726fb6dSPierre Jolivet     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
12982726fb6dSPierre Jolivet 
12999566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->A));
13009566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->B));
13015f80ce2aSJacob Faibussowitsch   }
13023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13032726fb6dSPierre Jolivet }
13042726fb6dSPierre Jolivet 
1305d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1306d71ae5a4SJacob Faibussowitsch {
130799cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
130899cafbc1SBarry Smith 
130999cafbc1SBarry Smith   PetscFunctionBegin;
13109566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
13119566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
13123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
131399cafbc1SBarry Smith }
131499cafbc1SBarry Smith 
1315d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1316d71ae5a4SJacob Faibussowitsch {
131799cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
131899cafbc1SBarry Smith 
131999cafbc1SBarry Smith   PetscFunctionBegin;
13209566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
13219566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
13223ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
132399cafbc1SBarry Smith }
132499cafbc1SBarry Smith 
13257dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
132636032a97SHong Zhang    Input: isrow       - distributed(parallel),
132736032a97SHong Zhang           iscol_local - locally owned (seq)
132836032a97SHong Zhang */
1329d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1330d71ae5a4SJacob Faibussowitsch {
13318f46ffcaSHong Zhang   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
13328f46ffcaSHong Zhang   const PetscInt *ptr1, *ptr2;
133336032a97SHong Zhang 
133436032a97SHong Zhang   PetscFunctionBegin;
13359566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &sz1));
13369566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_local, &sz2));
13371098a8e8SHong Zhang   if (sz1 > sz2) {
13381098a8e8SHong Zhang     *flg = PETSC_FALSE;
13393ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
13401098a8e8SHong Zhang   }
13418f46ffcaSHong Zhang 
13429566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &ptr1));
13439566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &ptr2));
13448f46ffcaSHong Zhang 
13459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz1, &a1));
13469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz2, &a2));
13479566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a1, ptr1, sz1));
13489566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a2, ptr2, sz2));
13499566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz1, a1));
13509566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz2, a2));
13518f46ffcaSHong Zhang 
13528f46ffcaSHong Zhang   nmatch = 0;
13538f46ffcaSHong Zhang   k      = 0;
13548f46ffcaSHong Zhang   for (i = 0; i < sz1; i++) {
13558f46ffcaSHong Zhang     for (j = k; j < sz2; j++) {
13568f46ffcaSHong Zhang       if (a1[i] == a2[j]) {
13579371c9d4SSatish Balay         k = j;
13589371c9d4SSatish Balay         nmatch++;
13598f46ffcaSHong Zhang         break;
13608f46ffcaSHong Zhang       }
13618f46ffcaSHong Zhang     }
13628f46ffcaSHong Zhang   }
13639566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &ptr1));
13649566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
13659566063dSJacob Faibussowitsch   PetscCall(PetscFree(a1));
13669566063dSJacob Faibussowitsch   PetscCall(PetscFree(a2));
13671098a8e8SHong Zhang   if (nmatch < sz1) {
13681098a8e8SHong Zhang     *flg = PETSC_FALSE;
13691098a8e8SHong Zhang   } else {
13701098a8e8SHong Zhang     *flg = PETSC_TRUE;
13711098a8e8SHong Zhang   }
13723ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13738f46ffcaSHong Zhang }
137436032a97SHong Zhang 
1375d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1376d71ae5a4SJacob Faibussowitsch {
137736032a97SHong Zhang   IS        iscol_local;
137836032a97SHong Zhang   PetscInt  csize;
137936032a97SHong Zhang   PetscBool isequal;
138036032a97SHong Zhang 
138136032a97SHong Zhang   PetscFunctionBegin;
13829566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
138336032a97SHong Zhang   if (call == MAT_REUSE_MATRIX) {
13849566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
13855f80ce2aSJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
138636032a97SHong Zhang   } else {
1387068661f9SToby Isaac     PetscBool issorted;
1388068661f9SToby Isaac 
13899566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
13909566063dSJacob Faibussowitsch     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
13919566063dSJacob Faibussowitsch     PetscCall(ISSorted(iscol_local, &issorted));
13925f80ce2aSJacob Faibussowitsch     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
13938f46ffcaSHong Zhang   }
13948f46ffcaSHong Zhang 
13957dae84e0SHong Zhang   /* now call MatCreateSubMatrix_MPIBAIJ() */
13969566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
13978f46ffcaSHong Zhang   if (call == MAT_INITIAL_MATRIX) {
13989566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
13999566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
14008f46ffcaSHong Zhang   }
14013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14028f46ffcaSHong Zhang }
14038f46ffcaSHong Zhang 
1404d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1405d71ae5a4SJacob Faibussowitsch {
1406a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1407a30f8f8cSSatish Balay 
1408a30f8f8cSSatish Balay   PetscFunctionBegin;
14099566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
14109566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
14113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1412a30f8f8cSSatish Balay }
1413a30f8f8cSSatish Balay 
1414d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1415d71ae5a4SJacob Faibussowitsch {
1416a30f8f8cSSatish Balay   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1417a30f8f8cSSatish Balay   Mat            A = a->A, B = a->B;
14183966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
1419a30f8f8cSSatish Balay 
1420a30f8f8cSSatish Balay   PetscFunctionBegin;
1421d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
142226fbe8dcSKarl Rupp 
14239566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
142426fbe8dcSKarl Rupp 
14259371c9d4SSatish Balay   isend[0] = info->nz_used;
14269371c9d4SSatish Balay   isend[1] = info->nz_allocated;
14279371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
14289371c9d4SSatish Balay   isend[3] = info->memory;
14299371c9d4SSatish Balay   isend[4] = info->mallocs;
143026fbe8dcSKarl Rupp 
14319566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
143226fbe8dcSKarl Rupp 
14339371c9d4SSatish Balay   isend[0] += info->nz_used;
14349371c9d4SSatish Balay   isend[1] += info->nz_allocated;
14359371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
14369371c9d4SSatish Balay   isend[3] += info->memory;
14379371c9d4SSatish Balay   isend[4] += info->mallocs;
1438a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1439a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1440a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1441a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1442a30f8f8cSSatish Balay     info->memory       = isend[3];
1443a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1444a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14451c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
144626fbe8dcSKarl Rupp 
1447a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1448a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1449a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1450a30f8f8cSSatish Balay     info->memory       = irecv[3];
1451a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1452a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14531c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
145426fbe8dcSKarl Rupp 
1455a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1456a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1457a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1458a30f8f8cSSatish Balay     info->memory       = irecv[3];
1459a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
146098921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1461a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1462a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1463a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
14643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1465a30f8f8cSSatish Balay }
1466a30f8f8cSSatish Balay 
1467d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1468d71ae5a4SJacob Faibussowitsch {
1469a30f8f8cSSatish Balay   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1470d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1471a30f8f8cSSatish Balay 
1472a30f8f8cSSatish Balay   PetscFunctionBegin;
1473e98b92d7SKris Buschelman   switch (op) {
1474512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1475e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
147628b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1477a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1478c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1479e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
148043674050SBarry Smith     MatCheckPreallocated(A, 1);
14819566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14829566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1483e98b92d7SKris Buschelman     break;
1484e98b92d7SKris Buschelman   case MAT_ROW_ORIENTED:
148543674050SBarry Smith     MatCheckPreallocated(A, 1);
14864e0d8c25SBarry Smith     a->roworiented = flg;
148726fbe8dcSKarl Rupp 
14889566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14899566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1490e98b92d7SKris Buschelman     break;
14918c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1492d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1493d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1494d71ae5a4SJacob Faibussowitsch     break;
1495d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
1496d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
1497d71ae5a4SJacob Faibussowitsch     break;
1498d71ae5a4SJacob Faibussowitsch   case MAT_USE_HASH_TABLE:
1499d71ae5a4SJacob Faibussowitsch     a->ht_flag = flg;
1500d71ae5a4SJacob Faibussowitsch     break;
1501d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
1502d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1503d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
15040f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1505eb1ec7c1SStefano Zampini     if (flg) { /* need different mat-vec ops */
1506547795f9SHong Zhang       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1507eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1508eb1ec7c1SStefano Zampini       A->ops->multtranspose    = NULL;
1509eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = NULL;
1510b94d7dedSBarry Smith       A->symmetric             = PETSC_BOOL3_FALSE;
1511eb1ec7c1SStefano Zampini     }
15120f2140c7SStefano Zampini #endif
1513eeffb40dSHong Zhang     break;
1514ffa07934SHong Zhang   case MAT_SPD:
1515d71ae5a4SJacob Faibussowitsch   case MAT_SYMMETRIC:
1516d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1517d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1518eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1519eb1ec7c1SStefano Zampini     if (flg) { /* restore to use default mat-vec ops */
1520eb1ec7c1SStefano Zampini       A->ops->mult             = MatMult_MPISBAIJ;
1521eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1522eb1ec7c1SStefano Zampini       A->ops->multtranspose    = MatMult_MPISBAIJ;
1523eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1524eb1ec7c1SStefano Zampini     }
1525eb1ec7c1SStefano Zampini #endif
1526eeffb40dSHong Zhang     break;
152777e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
152843674050SBarry Smith     MatCheckPreallocated(A, 1);
15299566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1530eeffb40dSHong Zhang     break;
15319a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1532b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
15335f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
15349566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
153577e54ba9SKris Buschelman     break;
1536d71ae5a4SJacob Faibussowitsch   case MAT_SPD_ETERNAL:
1537d71ae5a4SJacob Faibussowitsch     break;
1538d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_LOWER_TRIANGULAR:
1539d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1540d71ae5a4SJacob Faibussowitsch     break;
1541d71ae5a4SJacob Faibussowitsch   case MAT_ERROR_LOWER_TRIANGULAR:
1542d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1543d71ae5a4SJacob Faibussowitsch     break;
1544d71ae5a4SJacob Faibussowitsch   case MAT_GETROW_UPPERTRIANGULAR:
1545d71ae5a4SJacob Faibussowitsch     aA->getrow_utriangular = flg;
1546d71ae5a4SJacob Faibussowitsch     break;
1547d71ae5a4SJacob Faibussowitsch   default:
1548d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1549a30f8f8cSSatish Balay   }
15503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1551a30f8f8cSSatish Balay }
1552a30f8f8cSSatish Balay 
1553d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1554d71ae5a4SJacob Faibussowitsch {
1555a30f8f8cSSatish Balay   PetscFunctionBegin;
15567fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1557cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
15589566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1559cf37664fSBarry Smith   } else if (reuse == MAT_REUSE_MATRIX) {
15609566063dSJacob Faibussowitsch     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1561fc4dec0aSBarry Smith   }
15623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1563a30f8f8cSSatish Balay }
1564a30f8f8cSSatish Balay 
1565d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1566d71ae5a4SJacob Faibussowitsch {
1567a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1568a30f8f8cSSatish Balay   Mat           a = baij->A, b = baij->B;
15695e90f9d9SHong Zhang   PetscInt      nv, m, n;
1570ace3abfcSBarry Smith   PetscBool     flg;
1571a30f8f8cSSatish Balay 
1572a30f8f8cSSatish Balay   PetscFunctionBegin;
1573a30f8f8cSSatish Balay   if (ll != rr) {
15749566063dSJacob Faibussowitsch     PetscCall(VecEqual(ll, rr, &flg));
15755f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1576a30f8f8cSSatish Balay   }
15773ba16761SJacob Faibussowitsch   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1578b3bf805bSHong Zhang 
15799566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
15805f80ce2aSJacob Faibussowitsch   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1581b3bf805bSHong Zhang 
15829566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(rr, &nv));
15835f80ce2aSJacob Faibussowitsch   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
15845e90f9d9SHong Zhang 
15859566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
15865e90f9d9SHong Zhang 
15875e90f9d9SHong Zhang   /* left diagonalscale the off-diagonal part */
1588dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
15895e90f9d9SHong Zhang 
15905e90f9d9SHong Zhang   /* scale the diagonal part */
1591dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1592a30f8f8cSSatish Balay 
15935e90f9d9SHong Zhang   /* right diagonalscale the off-diagonal part */
15949566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1595dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
15963ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1597a30f8f8cSSatish Balay }
1598a30f8f8cSSatish Balay 
1599d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1600d71ae5a4SJacob Faibussowitsch {
1601f3566a2aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1602a30f8f8cSSatish Balay 
1603a30f8f8cSSatish Balay   PetscFunctionBegin;
16049566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
16053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1606a30f8f8cSSatish Balay }
1607a30f8f8cSSatish Balay 
16086849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1609a30f8f8cSSatish Balay 
1610d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1611d71ae5a4SJacob Faibussowitsch {
1612a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1613a30f8f8cSSatish Balay   Mat           a, b, c, d;
1614ace3abfcSBarry Smith   PetscBool     flg;
1615a30f8f8cSSatish Balay 
1616a30f8f8cSSatish Balay   PetscFunctionBegin;
16179371c9d4SSatish Balay   a = matA->A;
16189371c9d4SSatish Balay   b = matA->B;
16199371c9d4SSatish Balay   c = matB->A;
16209371c9d4SSatish Balay   d = matB->B;
1621a30f8f8cSSatish Balay 
16229566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
162348a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
16241c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
16253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1626a30f8f8cSSatish Balay }
1627a30f8f8cSSatish Balay 
1628d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1629d71ae5a4SJacob Faibussowitsch {
16304c7a3774SStefano Zampini   PetscBool isbaij;
16313c896bc6SHong Zhang 
16323c896bc6SHong Zhang   PetscFunctionBegin;
16339566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
16345f80ce2aSJacob Faibussowitsch   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
16353c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
16363c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
16379566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(A));
16389566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
16399566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(A));
16403c896bc6SHong Zhang   } else {
16414c7a3774SStefano Zampini     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
16424c7a3774SStefano Zampini     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
16434c7a3774SStefano Zampini 
16449566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
16459566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
16463c896bc6SHong Zhang   }
16479566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
16483ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16493c896bc6SHong Zhang }
16503c896bc6SHong Zhang 
1651d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1652d71ae5a4SJacob Faibussowitsch {
1653273d9f13SBarry Smith   PetscFunctionBegin;
16549566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
16553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1656273d9f13SBarry Smith }
1657a5e6ed63SBarry Smith 
1658d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1659d71ae5a4SJacob Faibussowitsch {
16604fe895cdSHong Zhang   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
16614fe895cdSHong Zhang   PetscBLASInt  bnz, one                          = 1;
16624fe895cdSHong Zhang   Mat_SeqSBAIJ *xa, *ya;
16634fe895cdSHong Zhang   Mat_SeqBAIJ  *xb, *yb;
16644fe895cdSHong Zhang 
16654fe895cdSHong Zhang   PetscFunctionBegin;
16664fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
16674fe895cdSHong Zhang     PetscScalar alpha = a;
16684fe895cdSHong Zhang     xa                = (Mat_SeqSBAIJ *)xx->A->data;
16694fe895cdSHong Zhang     ya                = (Mat_SeqSBAIJ *)yy->A->data;
16709566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1671792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
16724fe895cdSHong Zhang     xb = (Mat_SeqBAIJ *)xx->B->data;
16734fe895cdSHong Zhang     yb = (Mat_SeqBAIJ *)yy->B->data;
16749566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1675792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
16769566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1677ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
16789566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
16799566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
16809566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
16814fe895cdSHong Zhang   } else {
16824de5dceeSHong Zhang     Mat       B;
16834de5dceeSHong Zhang     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
16845f80ce2aSJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
16859566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(X));
16869566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(Y));
16879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
16889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
16899566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
16909566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
16919566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
16929566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
16939566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, MATMPISBAIJ));
16949566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
16959566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
16969566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
16979566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
16989566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
16999566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
17009566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
17019566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(X));
17029566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(Y));
17034fe895cdSHong Zhang   }
17043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17054fe895cdSHong Zhang }
17064fe895cdSHong Zhang 
1707d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1708d71ae5a4SJacob Faibussowitsch {
17091302d50aSBarry Smith   PetscInt  i;
1710afebec48SHong Zhang   PetscBool flg;
1711a5e6ed63SBarry Smith 
17126849ba73SBarry Smith   PetscFunctionBegin;
17139566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1714a5e6ed63SBarry Smith   for (i = 0; i < n; i++) {
17159566063dSJacob Faibussowitsch     PetscCall(ISEqual(irow[i], icol[i], &flg));
171648a46eb9SPierre Jolivet     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
17174dcd73b1SHong Zhang   }
17183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1719a5e6ed63SBarry Smith }
1720a5e6ed63SBarry Smith 
1721d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1722d71ae5a4SJacob Faibussowitsch {
17237d68702bSBarry Smith   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
17246f33a894SBarry Smith   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
17257d68702bSBarry Smith 
17267d68702bSBarry Smith   PetscFunctionBegin;
17276f33a894SBarry Smith   if (!Y->preallocated) {
17289566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
17296f33a894SBarry Smith   } else if (!aij->nz) {
1730b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
17319566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1732b83222d8SBarry Smith     aij->nonew = nonew;
17337d68702bSBarry Smith   }
17349566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
17353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17367d68702bSBarry Smith }
17377d68702bSBarry Smith 
1738d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1739d71ae5a4SJacob Faibussowitsch {
17403b49f96aSBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
17413b49f96aSBarry Smith 
17423b49f96aSBarry Smith   PetscFunctionBegin;
17435f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
17449566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
17453b49f96aSBarry Smith   if (d) {
17463b49f96aSBarry Smith     PetscInt rstart;
17479566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
17483b49f96aSBarry Smith     *d += rstart / A->rmap->bs;
17493b49f96aSBarry Smith   }
17503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17513b49f96aSBarry Smith }
17523b49f96aSBarry Smith 
1753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1754d71ae5a4SJacob Faibussowitsch {
1755a5b7ff6bSBarry Smith   PetscFunctionBegin;
1756a5b7ff6bSBarry Smith   *a = ((Mat_MPISBAIJ *)A->data)->A;
17573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1758a5b7ff6bSBarry Smith }
17593b49f96aSBarry Smith 
1760a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
17613964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1762a30f8f8cSSatish Balay                                        MatGetRow_MPISBAIJ,
1763a30f8f8cSSatish Balay                                        MatRestoreRow_MPISBAIJ,
1764a9d4b620SHong Zhang                                        MatMult_MPISBAIJ,
176597304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPISBAIJ,
1766431c96f7SBarry Smith                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1767431c96f7SBarry Smith                                        MatMultAdd_MPISBAIJ,
1768f4259b30SLisandro Dalcin                                        NULL,
1769f4259b30SLisandro Dalcin                                        NULL,
1770f4259b30SLisandro Dalcin                                        NULL,
1771f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1772f4259b30SLisandro Dalcin                                        NULL,
1773f4259b30SLisandro Dalcin                                        NULL,
177441f059aeSBarry Smith                                        MatSOR_MPISBAIJ,
1775a30f8f8cSSatish Balay                                        MatTranspose_MPISBAIJ,
177697304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPISBAIJ,
1777a30f8f8cSSatish Balay                                        MatEqual_MPISBAIJ,
1778a30f8f8cSSatish Balay                                        MatGetDiagonal_MPISBAIJ,
1779a30f8f8cSSatish Balay                                        MatDiagonalScale_MPISBAIJ,
1780a30f8f8cSSatish Balay                                        MatNorm_MPISBAIJ,
178197304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1782a30f8f8cSSatish Balay                                        MatAssemblyEnd_MPISBAIJ,
1783a30f8f8cSSatish Balay                                        MatSetOption_MPISBAIJ,
1784a30f8f8cSSatish Balay                                        MatZeroEntries_MPISBAIJ,
1785f4259b30SLisandro Dalcin                                        /* 24*/ NULL,
1786f4259b30SLisandro Dalcin                                        NULL,
1787f4259b30SLisandro Dalcin                                        NULL,
1788f4259b30SLisandro Dalcin                                        NULL,
1789f4259b30SLisandro Dalcin                                        NULL,
17904994cf47SJed Brown                                        /* 29*/ MatSetUp_MPISBAIJ,
1791f4259b30SLisandro Dalcin                                        NULL,
1792f4259b30SLisandro Dalcin                                        NULL,
1793a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPISBAIJ,
1794f4259b30SLisandro Dalcin                                        NULL,
1795d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPISBAIJ,
1796f4259b30SLisandro Dalcin                                        NULL,
1797f4259b30SLisandro Dalcin                                        NULL,
1798f4259b30SLisandro Dalcin                                        NULL,
1799f4259b30SLisandro Dalcin                                        NULL,
1800d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPISBAIJ,
18017dae84e0SHong Zhang                                        MatCreateSubMatrices_MPISBAIJ,
1802d94109b8SHong Zhang                                        MatIncreaseOverlap_MPISBAIJ,
1803a30f8f8cSSatish Balay                                        MatGetValues_MPISBAIJ,
18043c896bc6SHong Zhang                                        MatCopy_MPISBAIJ,
1805f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
1806a30f8f8cSSatish Balay                                        MatScale_MPISBAIJ,
18077d68702bSBarry Smith                                        MatShift_MPISBAIJ,
1808f4259b30SLisandro Dalcin                                        NULL,
1809f4259b30SLisandro Dalcin                                        NULL,
1810f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
1811f4259b30SLisandro Dalcin                                        NULL,
1812f4259b30SLisandro Dalcin                                        NULL,
1813f4259b30SLisandro Dalcin                                        NULL,
1814f4259b30SLisandro Dalcin                                        NULL,
1815f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1816f4259b30SLisandro Dalcin                                        NULL,
1817a30f8f8cSSatish Balay                                        MatSetUnfactored_MPISBAIJ,
1818f4259b30SLisandro Dalcin                                        NULL,
1819a30f8f8cSSatish Balay                                        MatSetValuesBlocked_MPISBAIJ,
18207dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1821f4259b30SLisandro Dalcin                                        NULL,
1822f4259b30SLisandro Dalcin                                        NULL,
1823f4259b30SLisandro Dalcin                                        NULL,
1824f4259b30SLisandro Dalcin                                        NULL,
1825f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1826f4259b30SLisandro Dalcin                                        NULL,
1827f4259b30SLisandro Dalcin                                        NULL,
1828f4259b30SLisandro Dalcin                                        NULL,
1829f4259b30SLisandro Dalcin                                        NULL,
1830d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1831f4259b30SLisandro Dalcin                                        NULL,
183228d58a37SPierre Jolivet                                        MatConvert_MPISBAIJ_Basic,
1833f4259b30SLisandro Dalcin                                        NULL,
1834f4259b30SLisandro Dalcin                                        NULL,
1835f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1836f4259b30SLisandro Dalcin                                        NULL,
1837f4259b30SLisandro Dalcin                                        NULL,
1838f4259b30SLisandro Dalcin                                        NULL,
1839f4259b30SLisandro Dalcin                                        NULL,
1840f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1841f4259b30SLisandro Dalcin                                        NULL,
1842f4259b30SLisandro Dalcin                                        NULL,
1843f4259b30SLisandro Dalcin                                        NULL,
18445bba2384SShri Abhyankar                                        MatLoad_MPISBAIJ,
1845f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
1846f4259b30SLisandro Dalcin                                        NULL,
1847f4259b30SLisandro Dalcin                                        NULL,
1848f4259b30SLisandro Dalcin                                        NULL,
1849f4259b30SLisandro Dalcin                                        NULL,
1850f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1851f4259b30SLisandro Dalcin                                        NULL,
1852f4259b30SLisandro Dalcin                                        NULL,
1853f4259b30SLisandro Dalcin                                        NULL,
1854f4259b30SLisandro Dalcin                                        NULL,
1855f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1856f4259b30SLisandro Dalcin                                        NULL,
1857f4259b30SLisandro Dalcin                                        NULL,
1858f4259b30SLisandro Dalcin                                        NULL,
1859f4259b30SLisandro Dalcin                                        NULL,
1860f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
1861f4259b30SLisandro Dalcin                                        NULL,
1862f4259b30SLisandro Dalcin                                        NULL,
18632726fb6dSPierre Jolivet                                        MatConjugate_MPISBAIJ,
1864f4259b30SLisandro Dalcin                                        NULL,
1865f4259b30SLisandro Dalcin                                        /*104*/ NULL,
186699cafbc1SBarry Smith                                        MatRealPart_MPISBAIJ,
1867d0d4cfc2SHong Zhang                                        MatImaginaryPart_MPISBAIJ,
1868d0d4cfc2SHong Zhang                                        MatGetRowUpperTriangular_MPISBAIJ,
186995936485SShri Abhyankar                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1870f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1871f4259b30SLisandro Dalcin                                        NULL,
1872f4259b30SLisandro Dalcin                                        NULL,
1873f4259b30SLisandro Dalcin                                        NULL,
18743b49f96aSBarry Smith                                        MatMissingDiagonal_MPISBAIJ,
1875f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1876f4259b30SLisandro Dalcin                                        NULL,
1877f4259b30SLisandro Dalcin                                        NULL,
1878f4259b30SLisandro Dalcin                                        NULL,
1879f4259b30SLisandro Dalcin                                        NULL,
1880f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1881f4259b30SLisandro Dalcin                                        NULL,
1882f4259b30SLisandro Dalcin                                        NULL,
1883f4259b30SLisandro Dalcin                                        NULL,
1884f4259b30SLisandro Dalcin                                        NULL,
1885f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1886f4259b30SLisandro Dalcin                                        NULL,
1887f4259b30SLisandro Dalcin                                        NULL,
1888f4259b30SLisandro Dalcin                                        NULL,
1889f4259b30SLisandro Dalcin                                        NULL,
1890f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1891f4259b30SLisandro Dalcin                                        NULL,
1892f4259b30SLisandro Dalcin                                        NULL,
1893f4259b30SLisandro Dalcin                                        NULL,
1894f4259b30SLisandro Dalcin                                        NULL,
1895f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1896f4259b30SLisandro Dalcin                                        NULL,
1897f4259b30SLisandro Dalcin                                        NULL,
1898f4259b30SLisandro Dalcin                                        NULL,
1899f4259b30SLisandro Dalcin                                        NULL,
190046533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
1901f4259b30SLisandro Dalcin                                        NULL,
1902f4259b30SLisandro Dalcin                                        NULL,
1903f4259b30SLisandro Dalcin                                        NULL,
1904f4259b30SLisandro Dalcin                                        NULL,
1905d70f29a3SPierre Jolivet                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1906d70f29a3SPierre Jolivet                                        NULL,
1907d70f29a3SPierre Jolivet                                        NULL,
190899a7f59eSMark Adams                                        NULL,
190999a7f59eSMark Adams                                        NULL,
19107fb60732SBarry Smith                                        NULL,
1911dec0b466SHong Zhang                                        /*150*/ NULL,
1912dec0b466SHong Zhang                                        NULL};
1913a30f8f8cSSatish Balay 
1914d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1915d71ae5a4SJacob Faibussowitsch {
1916476417e5SBarry Smith   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1917535b19f3SBarry Smith   PetscInt      i, mbs, Mbs;
19185d2a9ed1SStefano Zampini   PetscMPIInt   size;
1919a23d5eceSKris Buschelman 
1920a23d5eceSKris Buschelman   PetscFunctionBegin;
19219566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
19229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
19249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
19255f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
19265f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1927899cda47SBarry Smith 
1928d0f46423SBarry Smith   mbs = B->rmap->n / bs;
1929d0f46423SBarry Smith   Mbs = B->rmap->N / bs;
19305f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1931a23d5eceSKris Buschelman 
1932d0f46423SBarry Smith   B->rmap->bs = bs;
1933a23d5eceSKris Buschelman   b->bs2      = bs * bs;
1934a23d5eceSKris Buschelman   b->mbs      = mbs;
1935a23d5eceSKris Buschelman   b->Mbs      = Mbs;
1936de64b629SHong Zhang   b->nbs      = B->cmap->n / bs;
1937de64b629SHong Zhang   b->Nbs      = B->cmap->N / bs;
1938a23d5eceSKris Buschelman 
1939ad540459SPierre Jolivet   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1940d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart / bs;
1941d0f46423SBarry Smith   b->rendbs   = B->rmap->rend / bs;
1942a23d5eceSKris Buschelman 
1943d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart / bs;
1944d0f46423SBarry Smith   b->cendbs   = B->cmap->rend / bs;
1945a23d5eceSKris Buschelman 
1946cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
1947eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&b->colmap));
1948cb7b82ddSBarry Smith #else
19499566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
1950cb7b82ddSBarry Smith #endif
19519566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
19529566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
19539566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
19549566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0));
19559566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0b));
19569566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1));
19579566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1a));
19589566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1b));
19599566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->sMvctx));
1960cb7b82ddSBarry Smith 
1961cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
19629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
19639566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
19649566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
19659566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
19669566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1967cb7b82ddSBarry Smith 
1968526dfc15SBarry Smith   if (!B->preallocated) {
19699566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
19709566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
19719566063dSJacob Faibussowitsch     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
19729566063dSJacob Faibussowitsch     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1973526dfc15SBarry Smith   }
1974a23d5eceSKris Buschelman 
19759566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
19769566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
197726fbe8dcSKarl Rupp 
1978526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
1979cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
1980cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
19813ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1982a23d5eceSKris Buschelman }
1983a23d5eceSKris Buschelman 
1984d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1985d71ae5a4SJacob Faibussowitsch {
198602106b30SBarry Smith   PetscInt        m, rstart, cend;
1987f4259b30SLisandro Dalcin   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1988f4259b30SLisandro Dalcin   const PetscInt *JJ          = NULL;
1989f4259b30SLisandro Dalcin   PetscScalar    *values      = NULL;
1990bb80cfbbSStefano Zampini   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
19913bd0feecSPierre Jolivet   PetscBool       nooffprocentries;
1992dfb205c3SBarry Smith 
1993dfb205c3SBarry Smith   PetscFunctionBegin;
19945f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
19959566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
19969566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
19979566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19989566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
19999566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2000dfb205c3SBarry Smith   m      = B->rmap->n / bs;
2001dfb205c3SBarry Smith   rstart = B->rmap->rstart / bs;
2002dfb205c3SBarry Smith   cend   = B->cmap->rend / bs;
2003dfb205c3SBarry Smith 
20045f80ce2aSJacob Faibussowitsch   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
20059566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2006dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2007dfb205c3SBarry Smith     nz = ii[i + 1] - ii[i];
20085f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
20090cd7f59aSBarry Smith     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2010dfb205c3SBarry Smith     JJ = jj + ii[i];
20110cd7f59aSBarry Smith     bd = 0;
2012dfb205c3SBarry Smith     for (j = 0; j < nz; j++) {
20130cd7f59aSBarry Smith       if (*JJ >= i + rstart) break;
2014dfb205c3SBarry Smith       JJ++;
20150cd7f59aSBarry Smith       bd++;
2016dfb205c3SBarry Smith     }
2017dfb205c3SBarry Smith     d = 0;
2018dfb205c3SBarry Smith     for (; j < nz; j++) {
2019dfb205c3SBarry Smith       if (*JJ++ >= cend) break;
2020dfb205c3SBarry Smith       d++;
2021dfb205c3SBarry Smith     }
2022dfb205c3SBarry Smith     d_nnz[i] = d;
20230cd7f59aSBarry Smith     o_nnz[i] = nz - d - bd;
20240cd7f59aSBarry Smith     nz       = nz - bd;
20250cd7f59aSBarry Smith     nz_max   = PetscMax(nz_max, nz);
2026dfb205c3SBarry Smith   }
20279566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
20289566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
20299566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
2030dfb205c3SBarry Smith 
2031dfb205c3SBarry Smith   values = (PetscScalar *)V;
203248a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2033dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2034dfb205c3SBarry Smith     PetscInt        row   = i + rstart;
2035dfb205c3SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
2036dfb205c3SBarry Smith     const PetscInt *icols = jj + ii[i];
2037bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2038dfb205c3SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
20399566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2040bb80cfbbSStefano Zampini     } else { /* block ordering does not match so we can only insert one block at a time. */
2041bb80cfbbSStefano Zampini       PetscInt j;
20420cd7f59aSBarry Smith       for (j = 0; j < ncols; j++) {
20430cd7f59aSBarry Smith         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
20449566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
20450cd7f59aSBarry Smith       }
20460cd7f59aSBarry Smith     }
2047dfb205c3SBarry Smith   }
2048dfb205c3SBarry Smith 
20499566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
20503bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
20513bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
20529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
20539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
20543bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
20553bd0feecSPierre Jolivet 
20569566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
20573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2058dfb205c3SBarry Smith }
2059dfb205c3SBarry Smith 
20600bad9183SKris Buschelman /*MC
2061fafad747SKris Buschelman    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2062828413b8SBarry Smith    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2063828413b8SBarry Smith    the matrix is stored.
2064828413b8SBarry Smith 
2065828413b8SBarry Smith    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
206611a5261eSBarry Smith    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
20670bad9183SKris Buschelman 
20680bad9183SKris Buschelman    Options Database Keys:
206911a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
20700bad9183SKris Buschelman 
207111a5261eSBarry Smith    Note:
2072476417e5SBarry Smith      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2073476417e5SBarry Smith      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2074476417e5SBarry Smith 
20750bad9183SKris Buschelman    Level: beginner
20760bad9183SKris Buschelman 
207711a5261eSBarry Smith .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
20780bad9183SKris Buschelman M*/
20790bad9183SKris Buschelman 
2080d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2081d71ae5a4SJacob Faibussowitsch {
2082b5df2d14SHong Zhang   Mat_MPISBAIJ *b;
208394ae4db5SBarry Smith   PetscBool     flg = PETSC_FALSE;
2084b5df2d14SHong Zhang 
2085b5df2d14SHong Zhang   PetscFunctionBegin;
20864dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
2087b0a32e0cSBarry Smith   B->data = (void *)b;
20889566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2089b5df2d14SHong Zhang 
2090b5df2d14SHong Zhang   B->ops->destroy = MatDestroy_MPISBAIJ;
2091b5df2d14SHong Zhang   B->ops->view    = MatView_MPISBAIJ;
2092b5df2d14SHong Zhang   B->assembled    = PETSC_FALSE;
2093b5df2d14SHong Zhang   B->insertmode   = NOT_SET_VALUES;
209426fbe8dcSKarl Rupp 
20959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
20969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2097b5df2d14SHong Zhang 
2098b5df2d14SHong Zhang   /* build local table of row and column ownerships */
20999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2100b5df2d14SHong Zhang 
2101b5df2d14SHong Zhang   /* build cache for off array entries formed */
21029566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
210326fbe8dcSKarl Rupp 
2104b5df2d14SHong Zhang   b->donotstash  = PETSC_FALSE;
21050298fd71SBarry Smith   b->colmap      = NULL;
21060298fd71SBarry Smith   b->garray      = NULL;
2107b5df2d14SHong Zhang   b->roworiented = PETSC_TRUE;
2108b5df2d14SHong Zhang 
2109b5df2d14SHong Zhang   /* stuff used in block assembly */
2110f4259b30SLisandro Dalcin   b->barray = NULL;
2111b5df2d14SHong Zhang 
2112b5df2d14SHong Zhang   /* stuff used for matrix vector multiply */
2113f4259b30SLisandro Dalcin   b->lvec    = NULL;
2114f4259b30SLisandro Dalcin   b->Mvctx   = NULL;
2115f4259b30SLisandro Dalcin   b->slvec0  = NULL;
2116f4259b30SLisandro Dalcin   b->slvec0b = NULL;
2117f4259b30SLisandro Dalcin   b->slvec1  = NULL;
2118f4259b30SLisandro Dalcin   b->slvec1a = NULL;
2119f4259b30SLisandro Dalcin   b->slvec1b = NULL;
2120f4259b30SLisandro Dalcin   b->sMvctx  = NULL;
2121b5df2d14SHong Zhang 
2122b5df2d14SHong Zhang   /* stuff for MatGetRow() */
2123f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2124f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2125b5df2d14SHong Zhang   b->getrowactive = PETSC_FALSE;
2126b5df2d14SHong Zhang 
2127b5df2d14SHong Zhang   /* hash table stuff */
2128f4259b30SLisandro Dalcin   b->ht           = NULL;
2129f4259b30SLisandro Dalcin   b->hd           = NULL;
2130b5df2d14SHong Zhang   b->ht_size      = 0;
2131b5df2d14SHong Zhang   b->ht_flag      = PETSC_FALSE;
2132b5df2d14SHong Zhang   b->ht_fact      = 0;
2133b5df2d14SHong Zhang   b->ht_total_ct  = 0;
2134b5df2d14SHong Zhang   b->ht_insert_ct = 0;
2135b5df2d14SHong Zhang 
21367dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
21377a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
21387a868f3eSHong Zhang 
2139f4259b30SLisandro Dalcin   b->in_loc = NULL;
2140f4259b30SLisandro Dalcin   b->v_loc  = NULL;
214159ffdab8SBarry Smith   b->n_loc  = 0;
214294ae4db5SBarry Smith 
21439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
21449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
21459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
21469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
21476214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
21489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
21496214f412SHong Zhang #endif
2150d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
21519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2152d24d4204SJose E. Roman #endif
21539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
21549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2155aa5a9175SDahai Guo 
2156b94d7dedSBarry Smith   B->symmetric                   = PETSC_BOOL3_TRUE;
2157b94d7dedSBarry Smith   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2158b94d7dedSBarry Smith   B->symmetry_eternal            = PETSC_TRUE;
2159b94d7dedSBarry Smith   B->structural_symmetry_eternal = PETSC_TRUE;
2160eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
2161b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_FALSE;
2162eb1ec7c1SStefano Zampini #else
2163b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_TRUE;
2164eb1ec7c1SStefano Zampini #endif
216513647f61SHong Zhang 
21669566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2167d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
21689566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
216994ae4db5SBarry Smith   if (flg) {
217094ae4db5SBarry Smith     PetscReal fact = 1.39;
21719566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
21729566063dSJacob Faibussowitsch     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
217394ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
21749566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
21759566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
217694ae4db5SBarry Smith   }
2177d0609cedSBarry Smith   PetscOptionsEnd();
21783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2179b5df2d14SHong Zhang }
2180b5df2d14SHong Zhang 
2181209238afSKris Buschelman /*MC
2182002d173eSKris Buschelman    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2183209238afSKris Buschelman 
218411a5261eSBarry Smith    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
218511a5261eSBarry Smith    and `MATMPISBAIJ` otherwise.
2186209238afSKris Buschelman 
218711a5261eSBarry Smith    Options Database Key:
2188c5dec841SPierre Jolivet . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2189209238afSKris Buschelman 
2190209238afSKris Buschelman   Level: beginner
2191209238afSKris Buschelman 
2192c5dec841SPierre Jolivet .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2193209238afSKris Buschelman M*/
2194209238afSKris Buschelman 
2195b5df2d14SHong Zhang /*@C
2196b5df2d14SHong Zhang    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2197b5df2d14SHong Zhang    the user should preallocate the matrix storage by setting the parameters
2198b5df2d14SHong Zhang    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2199b5df2d14SHong Zhang    performance can be increased by more than a factor of 50.
2200b5df2d14SHong Zhang 
2201c3339decSBarry Smith    Collective
2202b5df2d14SHong Zhang 
2203b5df2d14SHong Zhang    Input Parameters:
22041c4f3114SJed Brown +  B - the matrix
2205bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2206bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2207b5df2d14SHong Zhang .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2208b5df2d14SHong Zhang            submatrix  (same for all local rows)
2209b5df2d14SHong Zhang .  d_nnz - array containing the number of block nonzeros in the various block rows
22106d10fdaeSSatish Balay            in the upper triangular and diagonal part of the in diagonal portion of the local
22110298fd71SBarry Smith            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
221295742e49SBarry Smith            for the diagonal entry and set a value even if it is zero.
2213b5df2d14SHong Zhang .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2214b5df2d14SHong Zhang            submatrix (same for all local rows).
2215b5df2d14SHong Zhang -  o_nnz - array containing the number of nonzeros in the various block rows of the
2216c2fc9fa9SBarry Smith            off-diagonal portion of the local submatrix that is right of the diagonal
22170298fd71SBarry Smith            (possibly different for each block row) or NULL.
2218b5df2d14SHong Zhang 
2219b5df2d14SHong Zhang    Options Database Keys:
2220a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2221b5df2d14SHong Zhang                      block calculations (much slower)
2222a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
2223b5df2d14SHong Zhang 
2224b5df2d14SHong Zhang    Notes:
2225b5df2d14SHong Zhang 
222611a5261eSBarry Smith    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2227b5df2d14SHong Zhang    than it must be used on all processors that share the object for that argument.
2228b5df2d14SHong Zhang 
222949a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
223049a6f317SBarry Smith 
2231b5df2d14SHong Zhang    Storage Information:
2232b5df2d14SHong Zhang    For a square global matrix we define each processor's diagonal portion
2233b5df2d14SHong Zhang    to be its local rows and the corresponding columns (a square submatrix);
2234b5df2d14SHong Zhang    each processor's off-diagonal portion encompasses the remainder of the
2235b5df2d14SHong Zhang    local matrix (a rectangular submatrix).
2236b5df2d14SHong Zhang 
2237b5df2d14SHong Zhang    The user can specify preallocated storage for the diagonal part of
2238b5df2d14SHong Zhang    the local submatrix with either d_nz or d_nnz (not both).  Set
223911a5261eSBarry Smith    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2240b5df2d14SHong Zhang    memory allocation.  Likewise, specify preallocated storage for the
2241b5df2d14SHong Zhang    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2242b5df2d14SHong Zhang 
224311a5261eSBarry Smith    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2244aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2245aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
2246aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
2247aa95bbe8SBarry Smith 
2248b5df2d14SHong Zhang    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2249b5df2d14SHong Zhang    the figure below we depict these three local rows and all columns (0-11).
2250b5df2d14SHong Zhang 
2251b5df2d14SHong Zhang .vb
2252b5df2d14SHong Zhang            0 1 2 3 4 5 6 7 8 9 10 11
2253a4b1a0f6SJed Brown           --------------------------
2254c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2255c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2256c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2257a4b1a0f6SJed Brown           --------------------------
2258b5df2d14SHong Zhang .ve
2259b5df2d14SHong Zhang 
2260b5df2d14SHong Zhang    Thus, any entries in the d locations are stored in the d (diagonal)
2261b5df2d14SHong Zhang    submatrix, and any entries in the o locations are stored in the
22626d10fdaeSSatish Balay    o (off-diagonal) submatrix.  Note that the d matrix is stored in
226311a5261eSBarry Smith    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2264b5df2d14SHong Zhang 
22656d10fdaeSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
22666d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
2267c2fc9fa9SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix
2268c2fc9fa9SBarry Smith 
2269b5df2d14SHong Zhang    In general, for PDE problems in which most nonzeros are near the diagonal,
2270b5df2d14SHong Zhang    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2271b5df2d14SHong Zhang    or you will get TERRIBLE performance; see the users' manual chapter on
2272b5df2d14SHong Zhang    matrices.
2273b5df2d14SHong Zhang 
2274b5df2d14SHong Zhang    Level: intermediate
2275b5df2d14SHong Zhang 
227611a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2277b5df2d14SHong Zhang @*/
2278d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2279d71ae5a4SJacob Faibussowitsch {
2280b5df2d14SHong Zhang   PetscFunctionBegin;
22816ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
22826ba663aaSJed Brown   PetscValidType(B, 1);
22836ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
2284cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
22853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2286b5df2d14SHong Zhang }
2287b5df2d14SHong Zhang 
2288a30f8f8cSSatish Balay /*@C
228911a5261eSBarry Smith    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2290a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2291a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2292a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2293a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2294a30f8f8cSSatish Balay 
2295d083f849SBarry Smith    Collective
2296a30f8f8cSSatish Balay 
2297a30f8f8cSSatish Balay    Input Parameters:
2298a30f8f8cSSatish Balay +  comm - MPI communicator
229911a5261eSBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2300bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
230111a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2302a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2303a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
230411a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2305a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2306a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
230711a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
230811a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2309a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2310a30f8f8cSSatish Balay            submatrix (same for all local rows)
2311a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
23126d10fdaeSSatish Balay            in the upper triangular portion of the in diagonal portion of the local
23130298fd71SBarry Smith            (possibly different for each block block row) or NULL.
231495742e49SBarry Smith            If you plan to factor the matrix you must leave room for the diagonal entry and
231595742e49SBarry Smith            set its value even if it is zero.
2316a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2317a30f8f8cSSatish Balay            submatrix (same for all local rows).
2318a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2319a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
23200298fd71SBarry Smith            each block row) or NULL.
2321a30f8f8cSSatish Balay 
2322a30f8f8cSSatish Balay    Output Parameter:
2323a30f8f8cSSatish Balay .  A - the matrix
2324a30f8f8cSSatish Balay 
2325a30f8f8cSSatish Balay    Options Database Keys:
2326a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2327a30f8f8cSSatish Balay                      block calculations (much slower)
2328a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2329a2b725a8SWilliam Gropp -   -mat_mpi - use the parallel matrix data structures even on one processor
2330a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2331a30f8f8cSSatish Balay 
233211a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2333f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
233411a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2335175b88e8SBarry Smith 
2336a30f8f8cSSatish Balay    Notes:
2337d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
23386d6d819aSHong Zhang    This matrix type does not support complex Hermitian operation.
2339d1be2dadSMatthew Knepley 
2340a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2341a30f8f8cSSatish Balay    (possibly both).
2342a30f8f8cSSatish Balay 
234311a5261eSBarry Smith    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2344a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2345a30f8f8cSSatish Balay 
234649a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
234749a6f317SBarry Smith 
2348a30f8f8cSSatish Balay    Storage Information:
2349a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2350a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2351a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2352a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2353a30f8f8cSSatish Balay 
2354a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2355a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both). Set
23560298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2357a30f8f8cSSatish Balay    memory allocation. Likewise, specify preallocated storage for the
2358a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2359a30f8f8cSSatish Balay 
2360a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2361a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2362a30f8f8cSSatish Balay 
2363a30f8f8cSSatish Balay .vb
2364a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2365a4b1a0f6SJed Brown           --------------------------
2366c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2367c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2368c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2369a4b1a0f6SJed Brown           --------------------------
2370a30f8f8cSSatish Balay .ve
2371a30f8f8cSSatish Balay 
2372a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2373a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
23746d10fdaeSSatish Balay    o (off-diagonal) submatrix. Note that the d matrix is stored in
237511a5261eSBarry Smith    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2376a30f8f8cSSatish Balay 
23776d10fdaeSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
23786d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
2379a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2380a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2381a30f8f8cSSatish Balay    one expects d_nz >> o_nz. For large problems you MUST preallocate memory
2382a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2383a30f8f8cSSatish Balay    matrices.
2384a30f8f8cSSatish Balay 
2385a30f8f8cSSatish Balay    Level: intermediate
2386a30f8f8cSSatish Balay 
238711a5261eSBarry Smith .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2388a30f8f8cSSatish Balay @*/
2389a30f8f8cSSatish Balay 
2390d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2391d71ae5a4SJacob Faibussowitsch {
23921302d50aSBarry Smith   PetscMPIInt size;
2393a30f8f8cSSatish Balay 
2394a30f8f8cSSatish Balay   PetscFunctionBegin;
23959566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
23969566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2398273d9f13SBarry Smith   if (size > 1) {
23999566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPISBAIJ));
24009566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2401273d9f13SBarry Smith   } else {
24029566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQSBAIJ));
24039566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2404273d9f13SBarry Smith   }
24053ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2406a30f8f8cSSatish Balay }
2407a30f8f8cSSatish Balay 
2408d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2409d71ae5a4SJacob Faibussowitsch {
2410a30f8f8cSSatish Balay   Mat           mat;
2411a30f8f8cSSatish Balay   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2412d0f46423SBarry Smith   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2413387bc808SHong Zhang   PetscScalar  *array;
2414a30f8f8cSSatish Balay 
2415a30f8f8cSSatish Balay   PetscFunctionBegin;
2416f4259b30SLisandro Dalcin   *newmat = NULL;
241726fbe8dcSKarl Rupp 
24189566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
24199566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
24209566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
24219566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
24229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2423e1b6402fSHong Zhang 
2424d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2425273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
242682327fa8SHong Zhang   mat->assembled    = PETSC_TRUE;
24277fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24287fff6886SHong Zhang 
2429b5df2d14SHong Zhang   a      = (Mat_MPISBAIJ *)mat->data;
2430a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2431a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2432a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2433a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2434a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2435a30f8f8cSSatish Balay 
2436a30f8f8cSSatish Balay   a->size         = oldmat->size;
2437a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2438a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2439a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2440f4259b30SLisandro Dalcin   a->rowindices   = NULL;
2441f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
2442a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2443f4259b30SLisandro Dalcin   a->barray       = NULL;
2444899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
2445899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
2446899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
2447899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
2448a30f8f8cSSatish Balay 
2449a30f8f8cSSatish Balay   /* hash table stuff */
2450f4259b30SLisandro Dalcin   a->ht           = NULL;
2451f4259b30SLisandro Dalcin   a->hd           = NULL;
2452a30f8f8cSSatish Balay   a->ht_size      = 0;
2453a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2454a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2455a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2456a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2457a30f8f8cSSatish Balay 
24589566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2459a30f8f8cSSatish Balay   if (oldmat->colmap) {
2460a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2461eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2462a30f8f8cSSatish Balay #else
24639566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
24649566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2465a30f8f8cSSatish Balay #endif
2466f4259b30SLisandro Dalcin   } else a->colmap = NULL;
2467387bc808SHong Zhang 
2468a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
24699566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len, &a->garray));
24709566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2471f4259b30SLisandro Dalcin   } else a->garray = NULL;
2472a30f8f8cSSatish Balay 
24739566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
24749566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
24759566063dSJacob Faibussowitsch   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
247682327fa8SHong Zhang 
24779566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
24789566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2479387bc808SHong Zhang 
24809566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(a->slvec1, &nt));
24819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec1, &array));
24829566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
24839566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
24849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec1, &array));
24859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &array));
24869566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
24879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &array));
2488387bc808SHong Zhang 
2489387bc808SHong Zhang   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
24909566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2491387bc808SHong Zhang   a->sMvctx = oldmat->sMvctx;
249282327fa8SHong Zhang 
24939566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
24949566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
24959566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2496a30f8f8cSSatish Balay   *newmat = mat;
24973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2498a30f8f8cSSatish Balay }
2499a30f8f8cSSatish Balay 
2500618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
2501618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2502618cc2edSLisandro Dalcin 
2503d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2504d71ae5a4SJacob Faibussowitsch {
25057f489da9SVaclav Hapla   PetscBool isbinary;
250695936485SShri Abhyankar 
250795936485SShri Abhyankar   PetscFunctionBegin;
25089566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
25095f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
25109566063dSJacob Faibussowitsch   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
25113ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
251295936485SShri Abhyankar }
251395936485SShri Abhyankar 
2514d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2515d71ae5a4SJacob Faibussowitsch {
251624d5174aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2517f4c0e9e4SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2518ca54ac64SHong Zhang   PetscReal     atmp;
251987828ca2SBarry Smith   PetscReal    *work, *svalues, *rvalues;
25201302d50aSBarry Smith   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
25211302d50aSBarry Smith   PetscMPIInt   rank, size;
25221302d50aSBarry Smith   PetscInt     *rowners_bs, dest, count, source;
252387828ca2SBarry Smith   PetscScalar  *va;
25248a1c53f2SBarry Smith   MatScalar    *ba;
2525f4c0e9e4SHong Zhang   MPI_Status    stat;
252624d5174aSHong Zhang 
252724d5174aSHong Zhang   PetscFunctionBegin;
25285f80ce2aSJacob Faibussowitsch   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
25299566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
25309566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &va));
2531f4c0e9e4SHong Zhang 
25329566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
25339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2534f4c0e9e4SHong Zhang 
2535d0f46423SBarry Smith   bs  = A->rmap->bs;
2536f4c0e9e4SHong Zhang   mbs = a->mbs;
2537f4c0e9e4SHong Zhang   Mbs = a->Mbs;
2538f4c0e9e4SHong Zhang   ba  = b->a;
2539f4c0e9e4SHong Zhang   bi  = b->i;
2540f4c0e9e4SHong Zhang   bj  = b->j;
2541f4c0e9e4SHong Zhang 
2542f4c0e9e4SHong Zhang   /* find ownerships */
2543d0f46423SBarry Smith   rowners_bs = A->rmap->range;
2544f4c0e9e4SHong Zhang 
2545f4c0e9e4SHong Zhang   /* each proc creates an array to be distributed */
25469566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs * Mbs, &work));
2547f4c0e9e4SHong Zhang 
2548f4c0e9e4SHong Zhang   /* row_max for B */
2549b8475685SHong Zhang   if (rank != size - 1) {
2550f4c0e9e4SHong Zhang     for (i = 0; i < mbs; i++) {
25519371c9d4SSatish Balay       ncols = bi[1] - bi[0];
25529371c9d4SSatish Balay       bi++;
2553f4c0e9e4SHong Zhang       brow = bs * i;
2554f4c0e9e4SHong Zhang       for (j = 0; j < ncols; j++) {
2555f4c0e9e4SHong Zhang         bcol = bs * (*bj);
2556f4c0e9e4SHong Zhang         for (kcol = 0; kcol < bs; kcol++) {
2557ca54ac64SHong Zhang           col = bcol + kcol;           /* local col index */
255804d41228SHong Zhang           col += rowners_bs[rank + 1]; /* global col index */
2559f4c0e9e4SHong Zhang           for (krow = 0; krow < bs; krow++) {
25609371c9d4SSatish Balay             atmp = PetscAbsScalar(*ba);
25619371c9d4SSatish Balay             ba++;
2562ca54ac64SHong Zhang             row = brow + krow; /* local row index */
2563ca54ac64SHong Zhang             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2564f4c0e9e4SHong Zhang             if (work[col] < atmp) work[col] = atmp;
2565f4c0e9e4SHong Zhang           }
2566f4c0e9e4SHong Zhang         }
2567f4c0e9e4SHong Zhang         bj++;
2568f4c0e9e4SHong Zhang       }
2569f4c0e9e4SHong Zhang     }
2570f4c0e9e4SHong Zhang 
2571f4c0e9e4SHong Zhang     /* send values to its owners */
2572f4c0e9e4SHong Zhang     for (dest = rank + 1; dest < size; dest++) {
2573f4c0e9e4SHong Zhang       svalues = work + rowners_bs[dest];
2574ca54ac64SHong Zhang       count   = rowners_bs[dest + 1] - rowners_bs[dest];
25759566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2576ca54ac64SHong Zhang     }
2577f4c0e9e4SHong Zhang   }
2578f4c0e9e4SHong Zhang 
2579f4c0e9e4SHong Zhang   /* receive values */
2580ca54ac64SHong Zhang   if (rank) {
2581f4c0e9e4SHong Zhang     rvalues = work;
2582ca54ac64SHong Zhang     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2583f4c0e9e4SHong Zhang     for (source = 0; source < rank; source++) {
25849566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2585f4c0e9e4SHong Zhang       /* process values */
2586f4c0e9e4SHong Zhang       for (i = 0; i < count; i++) {
2587ca54ac64SHong Zhang         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2588f4c0e9e4SHong Zhang       }
2589f4c0e9e4SHong Zhang     }
2590ca54ac64SHong Zhang   }
2591f4c0e9e4SHong Zhang 
25929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &va));
25939566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
25943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
259524d5174aSHong Zhang }
25962798e883SHong Zhang 
2597d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2598d71ae5a4SJacob Faibussowitsch {
25992798e883SHong Zhang   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2600d0f46423SBarry Smith   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
26013649974fSBarry Smith   PetscScalar       *x, *ptr, *from;
2602ffe4fb16SHong Zhang   Vec                bb1;
26033649974fSBarry Smith   const PetscScalar *b;
2604ffe4fb16SHong Zhang 
2605ffe4fb16SHong Zhang   PetscFunctionBegin;
26065f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
26075f80ce2aSJacob Faibussowitsch   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2608ffe4fb16SHong Zhang 
2609a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
26109566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
26113ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2612a2b30743SBarry Smith   }
2613a2b30743SBarry Smith 
2614ffe4fb16SHong Zhang   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2615ffe4fb16SHong Zhang     if (flag & SOR_ZERO_INITIAL_GUESS) {
26169566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2617ffe4fb16SHong Zhang       its--;
2618ffe4fb16SHong Zhang     }
2619ffe4fb16SHong Zhang 
26209566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb, &bb1));
2621ffe4fb16SHong Zhang     while (its--) {
2622ffe4fb16SHong Zhang       /* lower triangular part: slvec0b = - B^T*xx */
26239566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2624ffe4fb16SHong Zhang 
2625ffe4fb16SHong Zhang       /* copy xx into slvec0a */
26269566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec0, &ptr));
26279566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26289566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
26299566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2630ffe4fb16SHong Zhang 
26319566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->slvec0, -1.0));
2632ffe4fb16SHong Zhang 
2633ffe4fb16SHong Zhang       /* copy bb into slvec1a */
26349566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1, &ptr));
26359566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26369566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
26379566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2638ffe4fb16SHong Zhang 
2639ffe4fb16SHong Zhang       /* set slvec1b = 0 */
26409566063dSJacob Faibussowitsch       PetscCall(VecSet(mat->slvec1b, 0.0));
2641ffe4fb16SHong Zhang 
26429566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
26439566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
26449566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
26459566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2646ffe4fb16SHong Zhang 
2647ffe4fb16SHong Zhang       /* upper triangular part: bb1 = bb1 - B*x */
26489566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2649ffe4fb16SHong Zhang 
2650ffe4fb16SHong Zhang       /* local diagonal sweep */
26519566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2652ffe4fb16SHong Zhang     }
26539566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&bb1));
2654fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26559566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2656fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26579566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2658fa22f6d0SBarry Smith   } else if (flag & SOR_EISENSTAT) {
2659fa22f6d0SBarry Smith     Vec                xx1;
2660ace3abfcSBarry Smith     PetscBool          hasop;
266120f1ed55SBarry Smith     const PetscScalar *diag;
2662887ee2caSBarry Smith     PetscScalar       *sl, scale = (omega - 2.0) / omega;
266320f1ed55SBarry Smith     PetscInt           i, n;
2664fa22f6d0SBarry Smith 
2665fa22f6d0SBarry Smith     if (!mat->xx1) {
26669566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->xx1));
26679566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->bb1));
2668fa22f6d0SBarry Smith     }
2669fa22f6d0SBarry Smith     xx1 = mat->xx1;
2670fa22f6d0SBarry Smith     bb1 = mat->bb1;
2671fa22f6d0SBarry Smith 
26729566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2673fa22f6d0SBarry Smith 
2674fa22f6d0SBarry Smith     if (!mat->diag) {
2675effcda25SBarry Smith       /* this is wrong for same matrix with new nonzero values */
26769566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
26779566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin, mat->diag));
2678fa22f6d0SBarry Smith     }
26799566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2680fa22f6d0SBarry Smith 
2681fa22f6d0SBarry Smith     if (hasop) {
26829566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
26839566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
268420f1ed55SBarry Smith     } else {
268520f1ed55SBarry Smith       /*
268620f1ed55SBarry Smith           These two lines are replaced by code that may be a bit faster for a good compiler
26879566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
26889566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
268920f1ed55SBarry Smith       */
26909566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1a, &sl));
26919566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(mat->diag, &diag));
26929566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26939566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26949566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(xx, &n));
2695887ee2caSBarry Smith       if (omega == 1.0) {
269626fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
26979566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0 * n));
2698887ee2caSBarry Smith       } else {
269926fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
27009566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(3.0 * n));
2701887ee2caSBarry Smith       }
27029566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
27039566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
27049566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
27059566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
270620f1ed55SBarry Smith     }
2707fa22f6d0SBarry Smith 
2708fa22f6d0SBarry Smith     /* multiply off-diagonal portion of matrix */
27099566063dSJacob Faibussowitsch     PetscCall(VecSet(mat->slvec1b, 0.0));
27109566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
27119566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mat->slvec0, &from));
27129566063dSJacob Faibussowitsch     PetscCall(VecGetArray(xx, &x));
27139566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(from, x, bs * mbs));
27149566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mat->slvec0, &from));
27159566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(xx, &x));
27169566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27179566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27189566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2719fa22f6d0SBarry Smith 
2720fa22f6d0SBarry Smith     /* local sweep */
27219566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
27229566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx, 1.0, xx1));
2723f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
27243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2725ffe4fb16SHong Zhang }
2726ffe4fb16SHong Zhang 
2727dfb205c3SBarry Smith /*@
272811a5261eSBarry Smith      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2729dfb205c3SBarry Smith          CSR format the local rows.
2730dfb205c3SBarry Smith 
2731d083f849SBarry Smith    Collective
2732dfb205c3SBarry Smith 
2733dfb205c3SBarry Smith    Input Parameters:
2734dfb205c3SBarry Smith +  comm - MPI communicator
2735dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
273611a5261eSBarry Smith .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2737dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
273811a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2739dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
274011a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
274111a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2742483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2743dfb205c3SBarry Smith .   j - column indices
2744dfb205c3SBarry Smith -   a - matrix values
2745dfb205c3SBarry Smith 
2746dfb205c3SBarry Smith    Output Parameter:
2747dfb205c3SBarry Smith .   mat - the matrix
2748dfb205c3SBarry Smith 
2749dfb205c3SBarry Smith    Level: intermediate
2750dfb205c3SBarry Smith 
2751dfb205c3SBarry Smith    Notes:
2752dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2753dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2754dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2755dfb205c3SBarry Smith 
2756dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2757dfb205c3SBarry Smith 
275811a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2759db781477SPatrick Sanan           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2760dfb205c3SBarry Smith @*/
2761d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2762d71ae5a4SJacob Faibussowitsch {
2763dfb205c3SBarry Smith   PetscFunctionBegin;
27645f80ce2aSJacob Faibussowitsch   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
27655f80ce2aSJacob Faibussowitsch   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
27669566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
27679566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
27689566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPISBAIJ));
27699566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
27703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2771dfb205c3SBarry Smith }
2772dfb205c3SBarry Smith 
2773dfb205c3SBarry Smith /*@C
277411a5261eSBarry Smith    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2775dfb205c3SBarry Smith 
2776d083f849SBarry Smith    Collective
2777dfb205c3SBarry Smith 
2778dfb205c3SBarry Smith    Input Parameters:
27791c4f3114SJed Brown +  B - the matrix
2780dfb205c3SBarry Smith .  bs - the block size
2781dfb205c3SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2782dfb205c3SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2783dfb205c3SBarry Smith -  v - optional values in the matrix
2784dfb205c3SBarry Smith 
2785664954b6SBarry Smith    Level: advanced
2786664954b6SBarry Smith 
2787664954b6SBarry Smith    Notes:
27880cd7f59aSBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
27890cd7f59aSBarry Smith    and usually the numerical values as well
27900cd7f59aSBarry Smith 
279150c5228eSBarry Smith    Any entries below the diagonal are ignored
2792dfb205c3SBarry Smith 
279311a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2794dfb205c3SBarry Smith @*/
2795d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2796d71ae5a4SJacob Faibussowitsch {
2797dfb205c3SBarry Smith   PetscFunctionBegin;
2798cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
27993ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2800dfb205c3SBarry Smith }
2801dfb205c3SBarry Smith 
2802d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2803d71ae5a4SJacob Faibussowitsch {
280410c56fdeSHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
280510c56fdeSHong Zhang   PetscInt    *indx;
280610c56fdeSHong Zhang   PetscScalar *values;
2807dfb205c3SBarry Smith 
28084dcd73b1SHong Zhang   PetscFunctionBegin;
28099566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
281010c56fdeSHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
281110c56fdeSHong Zhang     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2812de25e9cbSPierre Jolivet     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
281310c56fdeSHong Zhang     PetscInt     *bindx, rmax = a->rmax, j;
2814de25e9cbSPierre Jolivet     PetscMPIInt   rank, size;
28154dcd73b1SHong Zhang 
28169566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28179371c9d4SSatish Balay     mbs = m / bs;
28189371c9d4SSatish Balay     Nbs = N / cbs;
281948a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2820da91a574SPierre Jolivet     nbs = n / cbs;
28214dcd73b1SHong Zhang 
28229566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(rmax, &bindx));
2823d0609cedSBarry Smith     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2824de25e9cbSPierre Jolivet 
28259566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
28269566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &size));
2827de25e9cbSPierre Jolivet     if (rank == size - 1) {
2828de25e9cbSPierre Jolivet       /* Check sum(nbs) = Nbs */
28295f80ce2aSJacob Faibussowitsch       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2830de25e9cbSPierre Jolivet     }
2831de25e9cbSPierre Jolivet 
2832d0609cedSBarry Smith     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
28339566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
283410c56fdeSHong Zhang     for (i = 0; i < mbs; i++) {
28359566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
28364dcd73b1SHong Zhang       nnz = nnz / bs;
28374dcd73b1SHong Zhang       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
28389566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
28399566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
28404dcd73b1SHong Zhang     }
28419566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28429566063dSJacob Faibussowitsch     PetscCall(PetscFree(bindx));
28434dcd73b1SHong Zhang 
28449566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
28459566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
28469566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
28479566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, MATSBAIJ));
28489566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
28499566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2850d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
28514dcd73b1SHong Zhang   }
28524dcd73b1SHong Zhang 
285310c56fdeSHong Zhang   /* numeric phase */
28549566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28559566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
28564dcd73b1SHong Zhang 
28579566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
28584dcd73b1SHong Zhang   for (i = 0; i < m; i++) {
28599566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28604dcd73b1SHong Zhang     Ii = i + rstart;
28619566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
28629566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28634dcd73b1SHong Zhang   }
28649566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28659566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
28669566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
28673ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28684dcd73b1SHong Zhang }
2869