xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 1cc06b555e92f8ec64db10330b8bbd830e5bc876)
1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
4c6db04a5SJed Brown #include <petscblaslapack.h>
5a30f8f8cSSatish Balay 
626cec326SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
726cec326SBarry Smith {
826cec326SBarry Smith   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
926cec326SBarry Smith 
1026cec326SBarry Smith   PetscFunctionBegin;
1126cec326SBarry Smith #if defined(PETSC_USE_LOG)
1226cec326SBarry Smith   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
1326cec326SBarry Smith #endif
1426cec326SBarry Smith   PetscCall(MatStashDestroy_Private(&mat->stash));
1526cec326SBarry Smith   PetscCall(MatStashDestroy_Private(&mat->bstash));
1626cec326SBarry Smith   PetscCall(MatDestroy(&baij->A));
1726cec326SBarry Smith   PetscCall(MatDestroy(&baij->B));
1826cec326SBarry Smith #if defined(PETSC_USE_CTABLE)
1926cec326SBarry Smith   PetscCall(PetscHMapIDestroy(&baij->colmap));
2026cec326SBarry Smith #else
2126cec326SBarry Smith   PetscCall(PetscFree(baij->colmap));
2226cec326SBarry Smith #endif
2326cec326SBarry Smith   PetscCall(PetscFree(baij->garray));
2426cec326SBarry Smith   PetscCall(VecDestroy(&baij->lvec));
2526cec326SBarry Smith   PetscCall(VecScatterDestroy(&baij->Mvctx));
2626cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec0));
2726cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec0b));
2826cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1));
2926cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1a));
3026cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1b));
3126cec326SBarry Smith   PetscCall(VecScatterDestroy(&baij->sMvctx));
3226cec326SBarry Smith   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
3326cec326SBarry Smith   PetscCall(PetscFree(baij->barray));
3426cec326SBarry Smith   PetscCall(PetscFree(baij->hd));
3526cec326SBarry Smith   PetscCall(VecDestroy(&baij->diag));
3626cec326SBarry Smith   PetscCall(VecDestroy(&baij->bb1));
3726cec326SBarry Smith   PetscCall(VecDestroy(&baij->xx1));
3826cec326SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
3926cec326SBarry Smith   PetscCall(PetscFree(baij->setvaluescopy));
4026cec326SBarry Smith #endif
4126cec326SBarry Smith   PetscCall(PetscFree(baij->in_loc));
4226cec326SBarry Smith   PetscCall(PetscFree(baij->v_loc));
4326cec326SBarry Smith   PetscCall(PetscFree(baij->rangebs));
4426cec326SBarry Smith   PetscCall(PetscFree(mat->data));
4526cec326SBarry Smith 
4626cec326SBarry Smith   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
4726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
4826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
4926cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
5026cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
5126cec326SBarry Smith #if defined(PETSC_HAVE_ELEMENTAL)
5226cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
5326cec326SBarry Smith #endif
5426cec326SBarry Smith #if defined(PETSC_HAVE_SCALAPACK)
5526cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
5626cec326SBarry Smith #endif
5726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
5826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
5926cec326SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
6026cec326SBarry Smith }
6126cec326SBarry Smith 
6226cec326SBarry Smith /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
6326cec326SBarry Smith #define TYPE SBAIJ
6426cec326SBarry Smith #define TYPE_SBAIJ
6526cec326SBarry Smith #include "../src/mat/impls/aij/mpi/mpihashmat.h"
6626cec326SBarry Smith #undef TYPE
6726cec326SBarry Smith #undef TYPE_SBAIJ
6826cec326SBarry Smith 
696214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
70cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
716214f412SHong Zhang #endif
72d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
73d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74d24d4204SJose E. Roman #endif
75b147fbf3SStefano Zampini 
76b147fbf3SStefano Zampini /* This could be moved to matimpl.h */
77d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78d71ae5a4SJacob Faibussowitsch {
79b147fbf3SStefano Zampini   Mat       preallocator;
80b147fbf3SStefano Zampini   PetscInt  r, rstart, rend;
81b147fbf3SStefano Zampini   PetscInt  bs, i, m, n, M, N;
82b147fbf3SStefano Zampini   PetscBool cong = PETSC_TRUE;
83b147fbf3SStefano Zampini 
84b147fbf3SStefano Zampini   PetscFunctionBegin;
85b147fbf3SStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
86b147fbf3SStefano Zampini   PetscValidLogicalCollectiveInt(B, nm, 2);
87b147fbf3SStefano Zampini   for (i = 0; i < nm; i++) {
88b147fbf3SStefano Zampini     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
899566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
905f80ce2aSJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91b147fbf3SStefano Zampini   }
92b147fbf3SStefano Zampini   PetscValidLogicalCollectiveBool(B, fill, 5);
939566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(B, &bs));
949566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, &M, &N));
959566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(B, &m, &n));
969566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
979566063dSJacob Faibussowitsch   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
989566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(preallocator, bs));
999566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(preallocator, m, n, M, N));
1009566063dSJacob Faibussowitsch   PetscCall(MatSetUp(preallocator));
1019566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102b147fbf3SStefano Zampini   for (r = rstart; r < rend; ++r) {
103b147fbf3SStefano Zampini     PetscInt           ncols;
104b147fbf3SStefano Zampini     const PetscInt    *row;
105b147fbf3SStefano Zampini     const PetscScalar *vals;
106b147fbf3SStefano Zampini 
107b147fbf3SStefano Zampini     for (i = 0; i < nm; i++) {
1089566063dSJacob Faibussowitsch       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
1099566063dSJacob Faibussowitsch       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
11048a46eb9SPierre Jolivet       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
1119566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112b147fbf3SStefano Zampini     }
113b147fbf3SStefano Zampini   }
1149566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
1159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
1169566063dSJacob Faibussowitsch   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
1179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&preallocator));
1183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
119b147fbf3SStefano Zampini }
120b147fbf3SStefano Zampini 
121d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122d71ae5a4SJacob Faibussowitsch {
123b147fbf3SStefano Zampini   Mat      B;
124b147fbf3SStefano Zampini   PetscInt r;
125b147fbf3SStefano Zampini 
126b147fbf3SStefano Zampini   PetscFunctionBegin;
127b147fbf3SStefano Zampini   if (reuse != MAT_REUSE_MATRIX) {
12828d58a37SPierre Jolivet     PetscBool symm = PETSC_TRUE, isdense;
129b147fbf3SStefano Zampini     PetscInt  bs;
130b147fbf3SStefano Zampini 
1319566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1329566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1339566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, newtype));
1349566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSize(A, &bs));
1359566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSize(B, bs));
1369566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
1379566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
1389566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
13928d58a37SPierre Jolivet     if (!isdense) {
1409566063dSJacob Faibussowitsch       PetscCall(MatGetRowUpperTriangular(A));
1419566063dSJacob Faibussowitsch       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
1429566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowUpperTriangular(A));
14328d58a37SPierre Jolivet     } else {
1449566063dSJacob Faibussowitsch       PetscCall(MatSetUp(B));
14528d58a37SPierre Jolivet     }
14628d58a37SPierre Jolivet   } else {
14728d58a37SPierre Jolivet     B = *newmat;
1489566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(B));
14928d58a37SPierre Jolivet   }
150b147fbf3SStefano Zampini 
1519566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
152b147fbf3SStefano Zampini   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153b147fbf3SStefano Zampini     PetscInt           ncols;
154b147fbf3SStefano Zampini     const PetscInt    *row;
155b147fbf3SStefano Zampini     const PetscScalar *vals;
156b147fbf3SStefano Zampini 
1579566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
1589566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
160b94d7dedSBarry Smith     if (A->hermitian == PETSC_BOOL3_TRUE) {
161eb1ec7c1SStefano Zampini       PetscInt i;
16248a46eb9SPierre Jolivet       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163eb1ec7c1SStefano Zampini     } else {
1649566063dSJacob Faibussowitsch       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165eb1ec7c1SStefano Zampini     }
166eb1ec7c1SStefano Zampini #else
1679566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168eb1ec7c1SStefano Zampini #endif
1699566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170b147fbf3SStefano Zampini   }
1719566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
1729566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1739566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
174b147fbf3SStefano Zampini 
175b147fbf3SStefano Zampini   if (reuse == MAT_INPLACE_MATRIX) {
1769566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
177b147fbf3SStefano Zampini   } else {
178b147fbf3SStefano Zampini     *newmat = B;
179b147fbf3SStefano Zampini   }
1803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
181b147fbf3SStefano Zampini }
182b147fbf3SStefano Zampini 
183d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184d71ae5a4SJacob Faibussowitsch {
185f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
186a30f8f8cSSatish Balay 
187a30f8f8cSSatish Balay   PetscFunctionBegin;
1889566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
1899566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191a30f8f8cSSatish Balay }
192a30f8f8cSSatish Balay 
193d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194d71ae5a4SJacob Faibussowitsch {
195f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
196a30f8f8cSSatish Balay 
197a30f8f8cSSatish Balay   PetscFunctionBegin;
1989566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
1999566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
2003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
201a30f8f8cSSatish Balay }
202a30f8f8cSSatish Balay 
203d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204a30f8f8cSSatish Balay   { \
205a30f8f8cSSatish Balay     brow = row / bs; \
2069371c9d4SSatish Balay     rp   = aj + ai[brow]; \
2079371c9d4SSatish Balay     ap   = aa + bs2 * ai[brow]; \
2089371c9d4SSatish Balay     rmax = aimax[brow]; \
2099371c9d4SSatish Balay     nrow = ailen[brow]; \
210a30f8f8cSSatish Balay     bcol = col / bs; \
2119371c9d4SSatish Balay     ridx = row % bs; \
2129371c9d4SSatish Balay     cidx = col % bs; \
2139371c9d4SSatish Balay     low  = 0; \
2149371c9d4SSatish Balay     high = nrow; \
215a30f8f8cSSatish Balay     while (high - low > 3) { \
216a30f8f8cSSatish Balay       t = (low + high) / 2; \
217a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
218a30f8f8cSSatish Balay       else low = t; \
219a30f8f8cSSatish Balay     } \
220a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
221a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
222a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
223a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
224a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
225a30f8f8cSSatish Balay         else *bap = value; \
226a30f8f8cSSatish Balay         goto a_noinsert; \
227a30f8f8cSSatish Balay       } \
228a30f8f8cSSatish Balay     } \
229a30f8f8cSSatish Balay     if (a->nonew == 1) goto a_noinsert; \
2305f80ce2aSJacob Faibussowitsch     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232a30f8f8cSSatish Balay     N = nrow++ - 1; \
233a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2349566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2359566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2369566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
238a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
239e56f5c9eSBarry Smith     A->nonzerostate++; \
240a30f8f8cSSatish Balay   a_noinsert:; \
241a30f8f8cSSatish Balay     ailen[brow] = nrow; \
242a30f8f8cSSatish Balay   }
243e5e170daSBarry Smith 
244d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245a30f8f8cSSatish Balay   { \
246a30f8f8cSSatish Balay     brow = row / bs; \
2479371c9d4SSatish Balay     rp   = bj + bi[brow]; \
2489371c9d4SSatish Balay     ap   = ba + bs2 * bi[brow]; \
2499371c9d4SSatish Balay     rmax = bimax[brow]; \
2509371c9d4SSatish Balay     nrow = bilen[brow]; \
251a30f8f8cSSatish Balay     bcol = col / bs; \
2529371c9d4SSatish Balay     ridx = row % bs; \
2539371c9d4SSatish Balay     cidx = col % bs; \
2549371c9d4SSatish Balay     low  = 0; \
2559371c9d4SSatish Balay     high = nrow; \
256a30f8f8cSSatish Balay     while (high - low > 3) { \
257a30f8f8cSSatish Balay       t = (low + high) / 2; \
258a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
259a30f8f8cSSatish Balay       else low = t; \
260a30f8f8cSSatish Balay     } \
261a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
262a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
263a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
264a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
265a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
266a30f8f8cSSatish Balay         else *bap = value; \
267a30f8f8cSSatish Balay         goto b_noinsert; \
268a30f8f8cSSatish Balay       } \
269a30f8f8cSSatish Balay     } \
270a30f8f8cSSatish Balay     if (b->nonew == 1) goto b_noinsert; \
2715f80ce2aSJacob Faibussowitsch     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273a30f8f8cSSatish Balay     N = nrow++ - 1; \
274a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2759566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2769566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2779566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
279a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
280e56f5c9eSBarry Smith     B->nonzerostate++; \
281a30f8f8cSSatish Balay   b_noinsert:; \
282a30f8f8cSSatish Balay     bilen[brow] = nrow; \
283a30f8f8cSSatish Balay   }
284a30f8f8cSSatish Balay 
285a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
286da81f932SPierre Jolivet    Any a(i,j) with i>j input by user is ignored or generates an error
287a30f8f8cSSatish Balay */
288d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289d71ae5a4SJacob Faibussowitsch {
290a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291a30f8f8cSSatish Balay   MatScalar     value;
292ace3abfcSBarry Smith   PetscBool     roworiented = baij->roworiented;
2931302d50aSBarry Smith   PetscInt      i, j, row, col;
294d0f46423SBarry Smith   PetscInt      rstart_orig = mat->rmap->rstart;
295d0f46423SBarry Smith   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296d0f46423SBarry Smith   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
297a30f8f8cSSatish Balay 
298a30f8f8cSSatish Balay   /* Some Variables required in the macro */
299a30f8f8cSSatish Balay   Mat           A     = baij->A;
300a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
3011302d50aSBarry Smith   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302a30f8f8cSSatish Balay   MatScalar    *aa = a->a;
303a30f8f8cSSatish Balay 
304a30f8f8cSSatish Balay   Mat          B     = baij->B;
305a30f8f8cSSatish Balay   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
3061302d50aSBarry Smith   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307a30f8f8cSSatish Balay   MatScalar   *ba = b->a;
308a30f8f8cSSatish Balay 
3091302d50aSBarry Smith   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
3101302d50aSBarry Smith   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
311a30f8f8cSSatish Balay   MatScalar *ap, *bap;
312a30f8f8cSSatish Balay 
313a30f8f8cSSatish Balay   /* for stash */
3140298fd71SBarry Smith   PetscInt   n_loc, *in_loc = NULL;
3150298fd71SBarry Smith   MatScalar *v_loc = NULL;
316a30f8f8cSSatish Balay 
317a30f8f8cSSatish Balay   PetscFunctionBegin;
318a30f8f8cSSatish Balay   if (!baij->donotstash) {
31959ffdab8SBarry Smith     if (n > baij->n_loc) {
3209566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->in_loc));
3219566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->v_loc));
3229566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->in_loc));
3239566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->v_loc));
32426fbe8dcSKarl Rupp 
32559ffdab8SBarry Smith       baij->n_loc = n;
32659ffdab8SBarry Smith     }
32759ffdab8SBarry Smith     in_loc = baij->in_loc;
32859ffdab8SBarry Smith     v_loc  = baij->v_loc;
329a30f8f8cSSatish Balay   }
330a30f8f8cSSatish Balay 
331a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
332a30f8f8cSSatish Balay     if (im[i] < 0) continue;
3335f80ce2aSJacob Faibussowitsch     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335a30f8f8cSSatish Balay       row = im[i] - rstart_orig;                     /* local row index */
336a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
33701b2bd88SHong Zhang         if (im[i] / bs > in[j] / bs) {
33801b2bd88SHong Zhang           if (a->ignore_ltriangular) {
33901b2bd88SHong Zhang             continue; /* ignore lower triangular blocks */
34026fbe8dcSKarl Rupp           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
34101b2bd88SHong Zhang         }
342a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343a30f8f8cSSatish Balay           col  = in[j] - cstart_orig;                    /* local col index */
3449371c9d4SSatish Balay           brow = row / bs;
3459371c9d4SSatish Balay           bcol = col / bs;
346a30f8f8cSSatish Balay           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
348db4deed7SKarl Rupp           else value = v[i + j * m];
349d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
3509566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
352f7d195e4SLawrence Mitchell           continue;
353f7d195e4SLawrence Mitchell         } else {
354f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355f7d195e4SLawrence Mitchell           /* off-diag entry (B) */
356a30f8f8cSSatish Balay           if (mat->was_assembled) {
35748a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
359eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
36071730473SSatish Balay             col = col - 1;
361a30f8f8cSSatish Balay #else
36271730473SSatish Balay             col = baij->colmap[in[j] / bs] - 1;
363a30f8f8cSSatish Balay #endif
364a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
3659566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
366a30f8f8cSSatish Balay               col = in[j];
367a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368a30f8f8cSSatish Balay               B     = baij->B;
369a30f8f8cSSatish Balay               b     = (Mat_SeqBAIJ *)(B)->data;
3709371c9d4SSatish Balay               bimax = b->imax;
3719371c9d4SSatish Balay               bi    = b->i;
3729371c9d4SSatish Balay               bilen = b->ilen;
3739371c9d4SSatish Balay               bj    = b->j;
374a30f8f8cSSatish Balay               ba    = b->a;
37571730473SSatish Balay             } else col += in[j] % bs;
376a30f8f8cSSatish Balay           } else col = in[j];
377db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
378db4deed7SKarl Rupp           else value = v[i + j * m];
379d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
3809566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381a30f8f8cSSatish Balay         }
382a30f8f8cSSatish Balay       }
383a30f8f8cSSatish Balay     } else { /* off processor entry */
3845f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385a30f8f8cSSatish Balay       if (!baij->donotstash) {
3865080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
387a30f8f8cSSatish Balay         n_loc          = 0;
388a30f8f8cSSatish Balay         for (j = 0; j < n; j++) {
389f65c83cfSHong Zhang           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
391a30f8f8cSSatish Balay           if (roworiented) {
392a30f8f8cSSatish Balay             v_loc[n_loc] = v[i * n + j];
393a30f8f8cSSatish Balay           } else {
394a30f8f8cSSatish Balay             v_loc[n_loc] = v[j * m + i];
395a30f8f8cSSatish Balay           }
396a30f8f8cSSatish Balay           n_loc++;
397a30f8f8cSSatish Balay         }
3989566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399a30f8f8cSSatish Balay       }
400a30f8f8cSSatish Balay     }
401a30f8f8cSSatish Balay   }
4023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
403a30f8f8cSSatish Balay }
404a30f8f8cSSatish Balay 
405d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406d71ae5a4SJacob Faibussowitsch {
40736bd2089SBarry Smith   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
40836bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
40936bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
41036bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
41136bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
41236bd2089SBarry Smith   const PetscScalar *value       = v;
41336bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
41436bd2089SBarry Smith 
41536bd2089SBarry Smith   PetscFunctionBegin;
41636bd2089SBarry Smith   if (col < row) {
4173ba16761SJacob Faibussowitsch     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
41836bd2089SBarry Smith     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
41936bd2089SBarry Smith   }
42036bd2089SBarry Smith   rp    = aj + ai[row];
42136bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
42236bd2089SBarry Smith   rmax  = imax[row];
42336bd2089SBarry Smith   nrow  = ailen[row];
42436bd2089SBarry Smith   value = v;
42536bd2089SBarry Smith   low   = 0;
42636bd2089SBarry Smith   high  = nrow;
42736bd2089SBarry Smith 
42836bd2089SBarry Smith   while (high - low > 7) {
42936bd2089SBarry Smith     t = (low + high) / 2;
43036bd2089SBarry Smith     if (rp[t] > col) high = t;
43136bd2089SBarry Smith     else low = t;
43236bd2089SBarry Smith   }
43336bd2089SBarry Smith   for (i = low; i < high; i++) {
43436bd2089SBarry Smith     if (rp[i] > col) break;
43536bd2089SBarry Smith     if (rp[i] == col) {
43636bd2089SBarry Smith       bap = ap + bs2 * i;
43736bd2089SBarry Smith       if (roworiented) {
43836bd2089SBarry Smith         if (is == ADD_VALUES) {
43936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
440ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
44136bd2089SBarry Smith           }
44236bd2089SBarry Smith         } else {
44336bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
444ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
44536bd2089SBarry Smith           }
44636bd2089SBarry Smith         }
44736bd2089SBarry Smith       } else {
44836bd2089SBarry Smith         if (is == ADD_VALUES) {
44936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
450ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
45136bd2089SBarry Smith           }
45236bd2089SBarry Smith         } else {
45336bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
454ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
45536bd2089SBarry Smith           }
45636bd2089SBarry Smith         }
45736bd2089SBarry Smith       }
45836bd2089SBarry Smith       goto noinsert2;
45936bd2089SBarry Smith     }
46036bd2089SBarry Smith   }
46136bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
4625f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
46336bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4649371c9d4SSatish Balay   N = nrow++ - 1;
4659371c9d4SSatish Balay   high++;
46636bd2089SBarry Smith   /* shift up all the later entries in this row */
4679566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4689566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
46936bd2089SBarry Smith   rp[i] = col;
47036bd2089SBarry Smith   bap   = ap + bs2 * i;
47136bd2089SBarry Smith   if (roworiented) {
47236bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
473ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
47436bd2089SBarry Smith     }
47536bd2089SBarry Smith   } else {
47636bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
477ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
47836bd2089SBarry Smith     }
47936bd2089SBarry Smith   }
48036bd2089SBarry Smith noinsert2:;
48136bd2089SBarry Smith   ailen[row] = nrow;
4823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
48336bd2089SBarry Smith }
48436bd2089SBarry Smith 
48536bd2089SBarry Smith /*
48636bd2089SBarry Smith    This routine is exactly duplicated in mpibaij.c
48736bd2089SBarry Smith */
488d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489d71ae5a4SJacob Faibussowitsch {
49036bd2089SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
49136bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
49236bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
49336bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
49436bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
49536bd2089SBarry Smith   const PetscScalar *value       = v;
49636bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
49736bd2089SBarry Smith 
49836bd2089SBarry Smith   PetscFunctionBegin;
49936bd2089SBarry Smith   rp    = aj + ai[row];
50036bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
50136bd2089SBarry Smith   rmax  = imax[row];
50236bd2089SBarry Smith   nrow  = ailen[row];
50336bd2089SBarry Smith   low   = 0;
50436bd2089SBarry Smith   high  = nrow;
50536bd2089SBarry Smith   value = v;
50636bd2089SBarry Smith   while (high - low > 7) {
50736bd2089SBarry Smith     t = (low + high) / 2;
50836bd2089SBarry Smith     if (rp[t] > col) high = t;
50936bd2089SBarry Smith     else low = t;
51036bd2089SBarry Smith   }
51136bd2089SBarry Smith   for (i = low; i < high; i++) {
51236bd2089SBarry Smith     if (rp[i] > col) break;
51336bd2089SBarry Smith     if (rp[i] == col) {
51436bd2089SBarry Smith       bap = ap + bs2 * i;
51536bd2089SBarry Smith       if (roworiented) {
51636bd2089SBarry Smith         if (is == ADD_VALUES) {
51736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
518ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
51936bd2089SBarry Smith           }
52036bd2089SBarry Smith         } else {
52136bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
522ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
52336bd2089SBarry Smith           }
52436bd2089SBarry Smith         }
52536bd2089SBarry Smith       } else {
52636bd2089SBarry Smith         if (is == ADD_VALUES) {
52736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
528ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
52936bd2089SBarry Smith             bap += bs;
53036bd2089SBarry Smith           }
53136bd2089SBarry Smith         } else {
53236bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
533ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
53436bd2089SBarry Smith             bap += bs;
53536bd2089SBarry Smith           }
53636bd2089SBarry Smith         }
53736bd2089SBarry Smith       }
53836bd2089SBarry Smith       goto noinsert2;
53936bd2089SBarry Smith     }
54036bd2089SBarry Smith   }
54136bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
5425f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
54336bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
5449371c9d4SSatish Balay   N = nrow++ - 1;
5459371c9d4SSatish Balay   high++;
54636bd2089SBarry Smith   /* shift up all the later entries in this row */
5479566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
5489566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
54936bd2089SBarry Smith   rp[i] = col;
55036bd2089SBarry Smith   bap   = ap + bs2 * i;
55136bd2089SBarry Smith   if (roworiented) {
55236bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
553ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
55436bd2089SBarry Smith     }
55536bd2089SBarry Smith   } else {
55636bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
557ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
55836bd2089SBarry Smith     }
55936bd2089SBarry Smith   }
56036bd2089SBarry Smith noinsert2:;
56136bd2089SBarry Smith   ailen[row] = nrow;
5623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
56336bd2089SBarry Smith }
56436bd2089SBarry Smith 
56536bd2089SBarry Smith /*
56636bd2089SBarry Smith     This routine could be optimized by removing the need for the block copy below and passing stride information
56736bd2089SBarry Smith   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
56836bd2089SBarry Smith */
569d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570d71ae5a4SJacob Faibussowitsch {
5710880e062SHong Zhang   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
572f15d580aSBarry Smith   const MatScalar *value;
573f15d580aSBarry Smith   MatScalar       *barray      = baij->barray;
574ace3abfcSBarry Smith   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575899cda47SBarry Smith   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
576476417e5SBarry Smith   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577476417e5SBarry Smith   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
5780880e062SHong Zhang 
579a30f8f8cSSatish Balay   PetscFunctionBegin;
5800880e062SHong Zhang   if (!barray) {
5819566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2, &barray));
5820880e062SHong Zhang     baij->barray = barray;
5830880e062SHong Zhang   }
5840880e062SHong Zhang 
5850880e062SHong Zhang   if (roworiented) {
5860880e062SHong Zhang     stepval = (n - 1) * bs;
5870880e062SHong Zhang   } else {
5880880e062SHong Zhang     stepval = (m - 1) * bs;
5890880e062SHong Zhang   }
5900880e062SHong Zhang   for (i = 0; i < m; i++) {
5910880e062SHong Zhang     if (im[i] < 0) continue;
5926bdcaf15SBarry Smith     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
5930880e062SHong Zhang     if (im[i] >= rstart && im[i] < rend) {
5940880e062SHong Zhang       row = im[i] - rstart;
5950880e062SHong Zhang       for (j = 0; j < n; j++) {
596f3f98c53SJed Brown         if (im[i] > in[j]) {
597f3f98c53SJed Brown           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598e32f2f54SBarry Smith           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599f3f98c53SJed Brown         }
6000880e062SHong Zhang         /* If NumCol = 1 then a copy is not required */
6010880e062SHong Zhang         if ((roworiented) && (n == 1)) {
602f15d580aSBarry Smith           barray = (MatScalar *)v + i * bs2;
6030880e062SHong Zhang         } else if ((!roworiented) && (m == 1)) {
604f15d580aSBarry Smith           barray = (MatScalar *)v + j * bs2;
6050880e062SHong Zhang         } else { /* Here a copy is required */
6060880e062SHong Zhang           if (roworiented) {
6070880e062SHong Zhang             value = v + i * (stepval + bs) * bs + j * bs;
6080880e062SHong Zhang           } else {
6090880e062SHong Zhang             value = v + j * (stepval + bs) * bs + i * bs;
6100880e062SHong Zhang           }
6110880e062SHong Zhang           for (ii = 0; ii < bs; ii++, value += stepval) {
612ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
6130880e062SHong Zhang           }
6140880e062SHong Zhang           barray -= bs2;
6150880e062SHong Zhang         }
6160880e062SHong Zhang 
6170880e062SHong Zhang         if (in[j] >= cstart && in[j] < cend) {
6180880e062SHong Zhang           col = in[j] - cstart;
6199566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
621f7d195e4SLawrence Mitchell           continue;
622f7d195e4SLawrence Mitchell         } else {
623f7d195e4SLawrence Mitchell           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
6240880e062SHong Zhang           if (mat->was_assembled) {
62548a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
6260880e062SHong Zhang 
6272515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
6280880e062SHong Zhang   #if defined(PETSC_USE_CTABLE)
6299371c9d4SSatish Balay             {
6309371c9d4SSatish Balay               PetscInt data;
631eec179cfSJacob Faibussowitsch               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
63208401ef6SPierre Jolivet               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
6330880e062SHong Zhang             }
6340880e062SHong Zhang   #else
63508401ef6SPierre Jolivet             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
6360880e062SHong Zhang   #endif
6370880e062SHong Zhang #endif
6380880e062SHong Zhang #if defined(PETSC_USE_CTABLE)
639eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
6400880e062SHong Zhang             col = (col - 1) / bs;
6410880e062SHong Zhang #else
6420880e062SHong Zhang             col = (baij->colmap[in[j]] - 1) / bs;
6430880e062SHong Zhang #endif
6440880e062SHong Zhang             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
6459566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
6460880e062SHong Zhang               col = in[j];
6470880e062SHong Zhang             }
64826fbe8dcSKarl Rupp           } else col = in[j];
6499566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
6500880e062SHong Zhang         }
6510880e062SHong Zhang       }
6520880e062SHong Zhang     } else {
6535f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
6540880e062SHong Zhang       if (!baij->donotstash) {
6550880e062SHong Zhang         if (roworiented) {
6569566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
6570880e062SHong Zhang         } else {
6589566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
6590880e062SHong Zhang         }
6600880e062SHong Zhang       }
6610880e062SHong Zhang     }
6620880e062SHong Zhang   }
6633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
664a30f8f8cSSatish Balay }
665a30f8f8cSSatish Balay 
666d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667d71ae5a4SJacob Faibussowitsch {
668f3566a2aSHong Zhang   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669d0f46423SBarry Smith   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670d0f46423SBarry Smith   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
671a30f8f8cSSatish Balay 
672a30f8f8cSSatish Balay   PetscFunctionBegin;
673a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
67454c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
67554c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
678a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
67954c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
68054c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
682a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
6839566063dSJacob Faibussowitsch           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684a30f8f8cSSatish Balay         } else {
68548a46eb9SPierre Jolivet           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
687eec179cfSJacob Faibussowitsch           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688a30f8f8cSSatish Balay           data--;
689a30f8f8cSSatish Balay #else
690a30f8f8cSSatish Balay           data = baij->colmap[idxn[j] / bs] - 1;
691a30f8f8cSSatish Balay #endif
692a30f8f8cSSatish Balay           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693a30f8f8cSSatish Balay           else {
694a30f8f8cSSatish Balay             col = data + idxn[j] % bs;
6959566063dSJacob Faibussowitsch             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696a30f8f8cSSatish Balay           }
697a30f8f8cSSatish Balay         }
698a30f8f8cSSatish Balay       }
699f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700a30f8f8cSSatish Balay   }
7013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
702a30f8f8cSSatish Balay }
703a30f8f8cSSatish Balay 
704d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705d71ae5a4SJacob Faibussowitsch {
706a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707a30f8f8cSSatish Balay   PetscReal     sum[2], *lnorm2;
708a30f8f8cSSatish Balay 
709a30f8f8cSSatish Balay   PetscFunctionBegin;
710a30f8f8cSSatish Balay   if (baij->size == 1) {
7119566063dSJacob Faibussowitsch     PetscCall(MatNorm(baij->A, type, norm));
712a30f8f8cSSatish Balay   } else {
713a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
7149566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(2, &lnorm2));
7159566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->A, type, lnorm2));
7169371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
7179371c9d4SSatish Balay       lnorm2++; /* squar power of norm(A) */
7189566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->B, type, lnorm2));
7199371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
7209371c9d4SSatish Balay       lnorm2--; /* squar power of norm(B) */
7211c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7228f1a2a5eSBarry Smith       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
7239566063dSJacob Faibussowitsch       PetscCall(PetscFree(lnorm2));
7240b8dc8d2SHong Zhang     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
7250b8dc8d2SHong Zhang       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
7260b8dc8d2SHong Zhang       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
7270b8dc8d2SHong Zhang       PetscReal    *rsum, *rsum2, vabs;
728899cda47SBarry Smith       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729d0f46423SBarry Smith       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
7300b8dc8d2SHong Zhang       MatScalar    *v;
7310b8dc8d2SHong Zhang 
7329566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
7339566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
7340b8dc8d2SHong Zhang       /* Amat */
7359371c9d4SSatish Balay       v  = amat->a;
7369371c9d4SSatish Balay       jj = amat->j;
7370b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
7380b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
7390b8dc8d2SHong Zhang         nz   = amat->i[brow + 1] - amat->i[brow];
7400b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
7419371c9d4SSatish Balay           gcol = bs * (rstart + *jj);
7429371c9d4SSatish Balay           jj++;
7430b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7440b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7459371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7469371c9d4SSatish Balay               v++;
7470b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7480b8dc8d2SHong Zhang               /* non-diagonal block */
7490b8dc8d2SHong Zhang               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
7500b8dc8d2SHong Zhang             }
7510b8dc8d2SHong Zhang           }
7520b8dc8d2SHong Zhang         }
7539566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7540b8dc8d2SHong Zhang       }
7550b8dc8d2SHong Zhang       /* Bmat */
7569371c9d4SSatish Balay       v  = bmat->a;
7579371c9d4SSatish Balay       jj = bmat->j;
7580b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
7590b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
7600b8dc8d2SHong Zhang         nz   = bmat->i[brow + 1] - bmat->i[brow];
7610b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
7629371c9d4SSatish Balay           gcol = bs * garray[*jj];
7639371c9d4SSatish Balay           jj++;
7640b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7650b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7669371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7679371c9d4SSatish Balay               v++;
7680b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7690b8dc8d2SHong Zhang               rsum[grow + row] += vabs;
7700b8dc8d2SHong Zhang             }
7710b8dc8d2SHong Zhang           }
7720b8dc8d2SHong Zhang         }
7739566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7740b8dc8d2SHong Zhang       }
7751c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7760b8dc8d2SHong Zhang       *norm = 0.0;
777d0f46423SBarry Smith       for (col = 0; col < mat->cmap->N; col++) {
7780b8dc8d2SHong Zhang         if (rsum2[col] > *norm) *norm = rsum2[col];
7790b8dc8d2SHong Zhang       }
7809566063dSJacob Faibussowitsch       PetscCall(PetscFree2(rsum, rsum2));
781f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782a30f8f8cSSatish Balay   }
7833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
784a30f8f8cSSatish Balay }
785a30f8f8cSSatish Balay 
786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787d71ae5a4SJacob Faibussowitsch {
788a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
7891302d50aSBarry Smith   PetscInt      nstash, reallocs;
790a30f8f8cSSatish Balay 
791a30f8f8cSSatish Balay   PetscFunctionBegin;
7923ba16761SJacob Faibussowitsch   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
793a30f8f8cSSatish Balay 
7949566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
7959566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
7969566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7979566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
7989566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7999566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
8003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
801a30f8f8cSSatish Balay }
802a30f8f8cSSatish Balay 
803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804d71ae5a4SJacob Faibussowitsch {
805a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
80713f74950SBarry Smith   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
808e44c0bd4SBarry Smith   PetscInt     *row, *col;
809ace3abfcSBarry Smith   PetscBool     other_disassembled;
81013f74950SBarry Smith   PetscMPIInt   n;
811ace3abfcSBarry Smith   PetscBool     r1, r2, r3;
812a30f8f8cSSatish Balay   MatScalar    *val;
813a30f8f8cSSatish Balay 
81491c97fd4SSatish Balay   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815a30f8f8cSSatish Balay   PetscFunctionBegin;
8164cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
817a30f8f8cSSatish Balay     while (1) {
8189566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819a30f8f8cSSatish Balay       if (!flg) break;
820a30f8f8cSSatish Balay 
821a30f8f8cSSatish Balay       for (i = 0; i < n;) {
822a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
82326fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
82426fbe8dcSKarl Rupp           if (row[j] != rstart) break;
82526fbe8dcSKarl Rupp         }
826a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
827a30f8f8cSSatish Balay         else ncols = n - i;
828a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
8299566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830a30f8f8cSSatish Balay         i = j;
831a30f8f8cSSatish Balay       }
832a30f8f8cSSatish Balay     }
8339566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
834a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
835a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
836a30f8f8cSSatish Balay        restore the original flags */
837a30f8f8cSSatish Balay     r1 = baij->roworiented;
838a30f8f8cSSatish Balay     r2 = a->roworiented;
83991c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
84026fbe8dcSKarl Rupp 
841a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
842a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
84326fbe8dcSKarl Rupp 
84491c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845a30f8f8cSSatish Balay     while (1) {
8469566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847a30f8f8cSSatish Balay       if (!flg) break;
848a30f8f8cSSatish Balay 
849a30f8f8cSSatish Balay       for (i = 0; i < n;) {
850a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
85126fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
85226fbe8dcSKarl Rupp           if (row[j] != rstart) break;
85326fbe8dcSKarl Rupp         }
854a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
855a30f8f8cSSatish Balay         else ncols = n - i;
8569566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857a30f8f8cSSatish Balay         i = j;
858a30f8f8cSSatish Balay       }
859a30f8f8cSSatish Balay     }
8609566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
86126fbe8dcSKarl Rupp 
862a30f8f8cSSatish Balay     baij->roworiented = r1;
863a30f8f8cSSatish Balay     a->roworiented    = r2;
86426fbe8dcSKarl Rupp 
86591c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866a30f8f8cSSatish Balay   }
867a30f8f8cSSatish Balay 
8689566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->A, mode));
8699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->A, mode));
870a30f8f8cSSatish Balay 
871a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
8726aad120cSJose E. Roman      also disassemble ourselves, in order that we may reassemble. */
873a30f8f8cSSatish Balay   /*
874a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
875a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
876a30f8f8cSSatish Balay   */
877a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
8785f9db2b2SJunchao Zhang     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
87948a46eb9SPierre Jolivet     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880a30f8f8cSSatish Balay   }
881a30f8f8cSSatish Balay 
8829371c9d4SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
8839566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->B, mode));
8849566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->B, mode));
885a30f8f8cSSatish Balay 
8869566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
88726fbe8dcSKarl Rupp 
888f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
8894f9cfa9eSBarry Smith 
8904f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
8914f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
8931c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894e56f5c9eSBarry Smith   }
8953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
896a30f8f8cSSatish Balay }
897a30f8f8cSSatish Balay 
898dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
8999804daf3SBarry Smith #include <petscdraw.h>
900d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901d71ae5a4SJacob Faibussowitsch {
902a30f8f8cSSatish Balay   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
903d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
9047da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
905ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
906b0a32e0cSBarry Smith   PetscViewer       sviewer;
907f3ef73ceSBarry Smith   PetscViewerFormat format;
908a30f8f8cSSatish Balay 
909a30f8f8cSSatish Balay   PetscFunctionBegin;
9109566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
9119566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
91232077d6dSBarry Smith   if (iascii) {
9139566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
914456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915a30f8f8cSSatish Balay       MatInfo info;
9169566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
9179566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
9189566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9199371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
9209371c9d4SSatish Balay                                                    mat->rmap->bs, (double)info.memory));
9219566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
9229566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
9239566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
9249566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
9259566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9269566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
9279566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
9289566063dSJacob Faibussowitsch       PetscCall(VecScatterView(baij->Mvctx, viewer));
9293ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
930fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
9319566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
9323ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
933c1490034SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
9343ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
935a30f8f8cSSatish Balay     }
936a30f8f8cSSatish Balay   }
937a30f8f8cSSatish Balay 
938a30f8f8cSSatish Balay   if (isdraw) {
939b0a32e0cSBarry Smith     PetscDraw draw;
940ace3abfcSBarry Smith     PetscBool isnull;
9419566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
9429566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
9433ba16761SJacob Faibussowitsch     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944a30f8f8cSSatish Balay   }
945a30f8f8cSSatish Balay 
9467da1fb6eSBarry Smith   {
947a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
948a30f8f8cSSatish Balay     Mat           A;
94965d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
95065d70643SHong Zhang     Mat_SeqBAIJ  *Bloc;
951d0f46423SBarry Smith     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952a30f8f8cSSatish Balay     MatScalar    *a;
9533e219373SBarry Smith     const char   *matname;
954a30f8f8cSSatish Balay 
955f204ca49SKris Buschelman     /* Should this be the same type as mat? */
9569566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957dd400576SPatrick Sanan     if (rank == 0) {
9589566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
959a30f8f8cSSatish Balay     } else {
9609566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
961a30f8f8cSSatish Balay     }
9629566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPISBAIJ));
9639566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
9649566063dSJacob Faibussowitsch     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
965a30f8f8cSSatish Balay 
966a30f8f8cSSatish Balay     /* copy over the A part */
96765d70643SHong Zhang     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
9689371c9d4SSatish Balay     ai   = Aloc->i;
9699371c9d4SSatish Balay     aj   = Aloc->j;
9709371c9d4SSatish Balay     a    = Aloc->a;
9719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs, &rvals));
972a30f8f8cSSatish Balay 
973a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
974e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
97526fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
977e9f7bc9eSHong Zhang         col = (baij->cstartbs + aj[j]) * bs;
978a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9799566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
98026fbe8dcSKarl Rupp           col++;
98126fbe8dcSKarl Rupp           a += bs;
982a30f8f8cSSatish Balay         }
983a30f8f8cSSatish Balay       }
984a30f8f8cSSatish Balay     }
985a30f8f8cSSatish Balay     /* copy over the B part */
98665d70643SHong Zhang     Bloc = (Mat_SeqBAIJ *)baij->B->data;
9879371c9d4SSatish Balay     ai   = Bloc->i;
9889371c9d4SSatish Balay     aj   = Bloc->j;
9899371c9d4SSatish Balay     a    = Bloc->a;
990a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
991e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
99226fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
994a30f8f8cSSatish Balay         col = baij->garray[aj[j]] * bs;
995a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9969566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
99726fbe8dcSKarl Rupp           col++;
99826fbe8dcSKarl Rupp           a += bs;
999a30f8f8cSSatish Balay         }
1000a30f8f8cSSatish Balay       }
1001a30f8f8cSSatish Balay     }
10029566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvals));
10039566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
10049566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005a30f8f8cSSatish Balay     /*
1006a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
1007b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
1008a30f8f8cSSatish Balay     */
10099566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
101023a3927dSBarry Smith     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011dd400576SPatrick Sanan     if (rank == 0) {
101223a3927dSBarry Smith       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
10139566063dSJacob Faibussowitsch       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014a30f8f8cSSatish Balay     }
10159566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
10169566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
10179566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
1018a30f8f8cSSatish Balay   }
10193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1020a30f8f8cSSatish Balay }
1021a30f8f8cSSatish Balay 
1022618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1024d1654148SHong Zhang 
1025d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026d71ae5a4SJacob Faibussowitsch {
1027ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
1028a30f8f8cSSatish Balay 
1029a30f8f8cSSatish Balay   PetscFunctionBegin;
10309566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
10319566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
10329566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
10339566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034d1654148SHong Zhang   if (iascii || isdraw || issocket) {
10359566063dSJacob Faibussowitsch     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
10361baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
10373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1038a30f8f8cSSatish Balay }
1039a30f8f8cSSatish Balay 
1040d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041d71ae5a4SJacob Faibussowitsch {
1042547795f9SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1043eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
10446de40e93SBarry Smith   PetscScalar       *from;
10456de40e93SBarry Smith   const PetscScalar *x;
1046547795f9SHong Zhang 
1047547795f9SHong Zhang   PetscFunctionBegin;
1048547795f9SHong Zhang   /* diagonal part */
10499566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
10509566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, 0.0));
1051547795f9SHong Zhang 
1052547795f9SHong Zhang   /* subdiagonal part */
10535f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
10549566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1055547795f9SHong Zhang 
1056547795f9SHong Zhang   /* copy x into the vec slvec0 */
10579566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10589566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1059547795f9SHong Zhang 
10609566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10619566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10629566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1063547795f9SHong Zhang 
10649566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10659566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1066547795f9SHong Zhang   /* supperdiagonal part */
10679566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
10683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1069547795f9SHong Zhang }
1070547795f9SHong Zhang 
1071d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1072d71ae5a4SJacob Faibussowitsch {
1073a9d4b620SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1074eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1075d9ca1df4SBarry Smith   PetscScalar       *from;
1076d9ca1df4SBarry Smith   const PetscScalar *x;
1077a9d4b620SHong Zhang 
1078a9d4b620SHong Zhang   PetscFunctionBegin;
1079a9d4b620SHong Zhang   /* diagonal part */
10809566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
10819566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, 0.0));
1082a9d4b620SHong Zhang 
1083a9d4b620SHong Zhang   /* subdiagonal part */
10849566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1085fc165ae2SBarry Smith 
1086a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
10879566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10889566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1089a9d4b620SHong Zhang 
10909566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10919566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1093fc165ae2SBarry Smith 
10949566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10959566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1096a9d4b620SHong Zhang   /* supperdiagonal part */
10979566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
10983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1099a9d4b620SHong Zhang }
1100a9d4b620SHong Zhang 
1101d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1102d71ae5a4SJacob Faibussowitsch {
1103eb1ec7c1SStefano Zampini   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1104eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1105eb1ec7c1SStefano Zampini   PetscScalar       *from, zero       = 0.0;
1106eb1ec7c1SStefano Zampini   const PetscScalar *x;
1107eb1ec7c1SStefano Zampini 
1108eb1ec7c1SStefano Zampini   PetscFunctionBegin;
1109eb1ec7c1SStefano Zampini   /* diagonal part */
11109566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
11119566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, zero));
1112eb1ec7c1SStefano Zampini 
1113eb1ec7c1SStefano Zampini   /* subdiagonal part */
11145f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
11159566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1116eb1ec7c1SStefano Zampini 
1117eb1ec7c1SStefano Zampini   /* copy x into the vec slvec0 */
11189566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11199566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11209566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11219566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1122eb1ec7c1SStefano Zampini 
11239566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11249566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11259566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1126eb1ec7c1SStefano Zampini 
1127eb1ec7c1SStefano Zampini   /* supperdiagonal part */
11289566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
11293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1130eb1ec7c1SStefano Zampini }
1131eb1ec7c1SStefano Zampini 
1132d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133d71ae5a4SJacob Faibussowitsch {
1134de8b6608SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1135d0f46423SBarry Smith   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1136d9ca1df4SBarry Smith   PetscScalar       *from, zero       = 0.0;
1137d9ca1df4SBarry Smith   const PetscScalar *x;
1138a9d4b620SHong Zhang 
1139a9d4b620SHong Zhang   PetscFunctionBegin;
1140a9d4b620SHong Zhang   /* diagonal part */
11419566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
11429566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, zero));
1143a9d4b620SHong Zhang 
1144a9d4b620SHong Zhang   /* subdiagonal part */
11459566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1146a9d4b620SHong Zhang 
1147a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
11489566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11499566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11509566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11519566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1152a9d4b620SHong Zhang 
11539566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11549566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11559566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1156a9d4b620SHong Zhang 
1157a9d4b620SHong Zhang   /* supperdiagonal part */
11589566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
11593ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1160a9d4b620SHong Zhang }
1161a9d4b620SHong Zhang 
1162a30f8f8cSSatish Balay /*
1163a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1164a30f8f8cSSatish Balay    diagonal block
1165a30f8f8cSSatish Balay */
1166d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1167d71ae5a4SJacob Faibussowitsch {
1168a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1169a30f8f8cSSatish Balay 
1170a30f8f8cSSatish Balay   PetscFunctionBegin;
117108401ef6SPierre Jolivet   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
11729566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A, v));
11733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1174a30f8f8cSSatish Balay }
1175a30f8f8cSSatish Balay 
1176d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1177d71ae5a4SJacob Faibussowitsch {
1178a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1179a30f8f8cSSatish Balay 
1180a30f8f8cSSatish Balay   PetscFunctionBegin;
11819566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
11829566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
11833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1184a30f8f8cSSatish Balay }
1185a30f8f8cSSatish Balay 
1186d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1187d71ae5a4SJacob Faibussowitsch {
1188d0d4cfc2SHong Zhang   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1189d0d4cfc2SHong Zhang   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1190d0f46423SBarry Smith   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1191d0f46423SBarry Smith   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1192899cda47SBarry Smith   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1193d0d4cfc2SHong Zhang 
1194a30f8f8cSSatish Balay   PetscFunctionBegin;
11955f80ce2aSJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1196d0d4cfc2SHong Zhang   mat->getrowactive = PETSC_TRUE;
1197d0d4cfc2SHong Zhang 
1198d0d4cfc2SHong Zhang   if (!mat->rowvalues && (idx || v)) {
1199d0d4cfc2SHong Zhang     /*
1200d0d4cfc2SHong Zhang         allocate enough space to hold information from the longest row.
1201d0d4cfc2SHong Zhang     */
1202d0d4cfc2SHong Zhang     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1203d0d4cfc2SHong Zhang     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1204d0d4cfc2SHong Zhang     PetscInt      max = 1, mbs = mat->mbs, tmp;
1205d0d4cfc2SHong Zhang     for (i = 0; i < mbs; i++) {
1206d0d4cfc2SHong Zhang       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
120726fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1208d0d4cfc2SHong Zhang     }
12099566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1210d0d4cfc2SHong Zhang   }
1211d0d4cfc2SHong Zhang 
12125f80ce2aSJacob Faibussowitsch   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1213d0d4cfc2SHong Zhang   lrow = row - brstart; /* local row index */
1214d0d4cfc2SHong Zhang 
12159371c9d4SSatish Balay   pvA = &vworkA;
12169371c9d4SSatish Balay   pcA = &cworkA;
12179371c9d4SSatish Balay   pvB = &vworkB;
12189371c9d4SSatish Balay   pcB = &cworkB;
12199371c9d4SSatish Balay   if (!v) {
12209371c9d4SSatish Balay     pvA = NULL;
12219371c9d4SSatish Balay     pvB = NULL;
12229371c9d4SSatish Balay   }
12239371c9d4SSatish Balay   if (!idx) {
12249371c9d4SSatish Balay     pcA = NULL;
12259371c9d4SSatish Balay     if (!v) pcB = NULL;
12269371c9d4SSatish Balay   }
12279566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
12289566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1229d0d4cfc2SHong Zhang   nztot = nzA + nzB;
1230d0d4cfc2SHong Zhang 
1231d0d4cfc2SHong Zhang   cmap = mat->garray;
1232d0d4cfc2SHong Zhang   if (v || idx) {
1233d0d4cfc2SHong Zhang     if (nztot) {
1234d0d4cfc2SHong Zhang       /* Sort by increasing column numbers, assuming A and B already sorted */
1235d0d4cfc2SHong Zhang       PetscInt imark = -1;
1236d0d4cfc2SHong Zhang       if (v) {
1237d0d4cfc2SHong Zhang         *v = v_p = mat->rowvalues;
1238d0d4cfc2SHong Zhang         for (i = 0; i < nzB; i++) {
1239d0d4cfc2SHong Zhang           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1240d0d4cfc2SHong Zhang           else break;
1241d0d4cfc2SHong Zhang         }
1242d0d4cfc2SHong Zhang         imark = i;
1243d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1244d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1245d0d4cfc2SHong Zhang       }
1246d0d4cfc2SHong Zhang       if (idx) {
1247d0d4cfc2SHong Zhang         *idx = idx_p = mat->rowindices;
1248d0d4cfc2SHong Zhang         if (imark > -1) {
1249ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1250d0d4cfc2SHong Zhang         } else {
1251d0d4cfc2SHong Zhang           for (i = 0; i < nzB; i++) {
125226fbe8dcSKarl Rupp             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253d0d4cfc2SHong Zhang             else break;
1254d0d4cfc2SHong Zhang           }
1255d0d4cfc2SHong Zhang           imark = i;
1256d0d4cfc2SHong Zhang         }
1257d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1258d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259d0d4cfc2SHong Zhang       }
1260d0d4cfc2SHong Zhang     } else {
1261f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1262f4259b30SLisandro Dalcin       if (v) *v = NULL;
1263d0d4cfc2SHong Zhang     }
1264d0d4cfc2SHong Zhang   }
1265d0d4cfc2SHong Zhang   *nz = nztot;
12669566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
12679566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
12683ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1269a30f8f8cSSatish Balay }
1270a30f8f8cSSatish Balay 
1271d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1272d71ae5a4SJacob Faibussowitsch {
1273a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1274a30f8f8cSSatish Balay 
1275a30f8f8cSSatish Balay   PetscFunctionBegin;
12765f80ce2aSJacob Faibussowitsch   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1277a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
12783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1279a30f8f8cSSatish Balay }
1280a30f8f8cSSatish Balay 
1281d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1282d71ae5a4SJacob Faibussowitsch {
1283d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1284d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1285d0d4cfc2SHong Zhang 
1286d0d4cfc2SHong Zhang   PetscFunctionBegin;
1287d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_TRUE;
12883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1289d0d4cfc2SHong Zhang }
1290d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1291d71ae5a4SJacob Faibussowitsch {
1292d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1293d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1294d0d4cfc2SHong Zhang 
1295d0d4cfc2SHong Zhang   PetscFunctionBegin;
1296d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_FALSE;
12973ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1298d0d4cfc2SHong Zhang }
1299d0d4cfc2SHong Zhang 
1300d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1301d71ae5a4SJacob Faibussowitsch {
13025f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
13035f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
13042726fb6dSPierre Jolivet     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
13052726fb6dSPierre Jolivet 
13069566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->A));
13079566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->B));
13085f80ce2aSJacob Faibussowitsch   }
13093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13102726fb6dSPierre Jolivet }
13112726fb6dSPierre Jolivet 
1312d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1313d71ae5a4SJacob Faibussowitsch {
131499cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
131599cafbc1SBarry Smith 
131699cafbc1SBarry Smith   PetscFunctionBegin;
13179566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
13189566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
13193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
132099cafbc1SBarry Smith }
132199cafbc1SBarry Smith 
1322d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1323d71ae5a4SJacob Faibussowitsch {
132499cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
132599cafbc1SBarry Smith 
132699cafbc1SBarry Smith   PetscFunctionBegin;
13279566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
13289566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
13293ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
133099cafbc1SBarry Smith }
133199cafbc1SBarry Smith 
13327dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
133336032a97SHong Zhang    Input: isrow       - distributed(parallel),
133436032a97SHong Zhang           iscol_local - locally owned (seq)
133536032a97SHong Zhang */
1336d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1337d71ae5a4SJacob Faibussowitsch {
13388f46ffcaSHong Zhang   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
13398f46ffcaSHong Zhang   const PetscInt *ptr1, *ptr2;
134036032a97SHong Zhang 
134136032a97SHong Zhang   PetscFunctionBegin;
13429566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &sz1));
13439566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_local, &sz2));
13441098a8e8SHong Zhang   if (sz1 > sz2) {
13451098a8e8SHong Zhang     *flg = PETSC_FALSE;
13463ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
13471098a8e8SHong Zhang   }
13488f46ffcaSHong Zhang 
13499566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &ptr1));
13509566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &ptr2));
13518f46ffcaSHong Zhang 
13529566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz1, &a1));
13539566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz2, &a2));
13549566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a1, ptr1, sz1));
13559566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a2, ptr2, sz2));
13569566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz1, a1));
13579566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz2, a2));
13588f46ffcaSHong Zhang 
13598f46ffcaSHong Zhang   nmatch = 0;
13608f46ffcaSHong Zhang   k      = 0;
13618f46ffcaSHong Zhang   for (i = 0; i < sz1; i++) {
13628f46ffcaSHong Zhang     for (j = k; j < sz2; j++) {
13638f46ffcaSHong Zhang       if (a1[i] == a2[j]) {
13649371c9d4SSatish Balay         k = j;
13659371c9d4SSatish Balay         nmatch++;
13668f46ffcaSHong Zhang         break;
13678f46ffcaSHong Zhang       }
13688f46ffcaSHong Zhang     }
13698f46ffcaSHong Zhang   }
13709566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &ptr1));
13719566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
13729566063dSJacob Faibussowitsch   PetscCall(PetscFree(a1));
13739566063dSJacob Faibussowitsch   PetscCall(PetscFree(a2));
13741098a8e8SHong Zhang   if (nmatch < sz1) {
13751098a8e8SHong Zhang     *flg = PETSC_FALSE;
13761098a8e8SHong Zhang   } else {
13771098a8e8SHong Zhang     *flg = PETSC_TRUE;
13781098a8e8SHong Zhang   }
13793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13808f46ffcaSHong Zhang }
138136032a97SHong Zhang 
1382d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1383d71ae5a4SJacob Faibussowitsch {
138436032a97SHong Zhang   IS        iscol_local;
138536032a97SHong Zhang   PetscInt  csize;
138636032a97SHong Zhang   PetscBool isequal;
138736032a97SHong Zhang 
138836032a97SHong Zhang   PetscFunctionBegin;
13899566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
139036032a97SHong Zhang   if (call == MAT_REUSE_MATRIX) {
13919566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
13925f80ce2aSJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
139336032a97SHong Zhang   } else {
1394068661f9SToby Isaac     PetscBool issorted;
1395068661f9SToby Isaac 
13969566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
13979566063dSJacob Faibussowitsch     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
13989566063dSJacob Faibussowitsch     PetscCall(ISSorted(iscol_local, &issorted));
13995f80ce2aSJacob Faibussowitsch     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
14008f46ffcaSHong Zhang   }
14018f46ffcaSHong Zhang 
14027dae84e0SHong Zhang   /* now call MatCreateSubMatrix_MPIBAIJ() */
14039566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
14048f46ffcaSHong Zhang   if (call == MAT_INITIAL_MATRIX) {
14059566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
14069566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
14078f46ffcaSHong Zhang   }
14083ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14098f46ffcaSHong Zhang }
14108f46ffcaSHong Zhang 
1411d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1412d71ae5a4SJacob Faibussowitsch {
1413a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1414a30f8f8cSSatish Balay 
1415a30f8f8cSSatish Balay   PetscFunctionBegin;
14169566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
14179566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
14183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1419a30f8f8cSSatish Balay }
1420a30f8f8cSSatish Balay 
1421d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1422d71ae5a4SJacob Faibussowitsch {
1423a30f8f8cSSatish Balay   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1424a30f8f8cSSatish Balay   Mat            A = a->A, B = a->B;
14253966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
1426a30f8f8cSSatish Balay 
1427a30f8f8cSSatish Balay   PetscFunctionBegin;
1428d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
142926fbe8dcSKarl Rupp 
14309566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
143126fbe8dcSKarl Rupp 
14329371c9d4SSatish Balay   isend[0] = info->nz_used;
14339371c9d4SSatish Balay   isend[1] = info->nz_allocated;
14349371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
14359371c9d4SSatish Balay   isend[3] = info->memory;
14369371c9d4SSatish Balay   isend[4] = info->mallocs;
143726fbe8dcSKarl Rupp 
14389566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
143926fbe8dcSKarl Rupp 
14409371c9d4SSatish Balay   isend[0] += info->nz_used;
14419371c9d4SSatish Balay   isend[1] += info->nz_allocated;
14429371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
14439371c9d4SSatish Balay   isend[3] += info->memory;
14449371c9d4SSatish Balay   isend[4] += info->mallocs;
1445a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1446a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1447a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1448a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1449a30f8f8cSSatish Balay     info->memory       = isend[3];
1450a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1451a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14521c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
145326fbe8dcSKarl Rupp 
1454a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1455a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1456a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1457a30f8f8cSSatish Balay     info->memory       = irecv[3];
1458a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1459a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14601c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
146126fbe8dcSKarl Rupp 
1462a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1463a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1464a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1465a30f8f8cSSatish Balay     info->memory       = irecv[3];
1466a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
146798921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1468a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1469a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1470a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
14713ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1472a30f8f8cSSatish Balay }
1473a30f8f8cSSatish Balay 
1474d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1475d71ae5a4SJacob Faibussowitsch {
1476a30f8f8cSSatish Balay   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1477d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1478a30f8f8cSSatish Balay 
1479a30f8f8cSSatish Balay   PetscFunctionBegin;
1480e98b92d7SKris Buschelman   switch (op) {
1481512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1482e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
148328b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1484a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1485c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1486e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
148743674050SBarry Smith     MatCheckPreallocated(A, 1);
14889566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14899566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1490e98b92d7SKris Buschelman     break;
1491e98b92d7SKris Buschelman   case MAT_ROW_ORIENTED:
149243674050SBarry Smith     MatCheckPreallocated(A, 1);
14934e0d8c25SBarry Smith     a->roworiented = flg;
149426fbe8dcSKarl Rupp 
14959566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14969566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1497e98b92d7SKris Buschelman     break;
14988c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1499d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1500d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1501d71ae5a4SJacob Faibussowitsch     break;
1502d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
1503d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
1504d71ae5a4SJacob Faibussowitsch     break;
1505d71ae5a4SJacob Faibussowitsch   case MAT_USE_HASH_TABLE:
1506d71ae5a4SJacob Faibussowitsch     a->ht_flag = flg;
1507d71ae5a4SJacob Faibussowitsch     break;
1508d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
1509d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1510d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
15110f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1512eb1ec7c1SStefano Zampini     if (flg) { /* need different mat-vec ops */
1513547795f9SHong Zhang       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1514eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1515eb1ec7c1SStefano Zampini       A->ops->multtranspose    = NULL;
1516eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = NULL;
1517b94d7dedSBarry Smith       A->symmetric             = PETSC_BOOL3_FALSE;
1518eb1ec7c1SStefano Zampini     }
15190f2140c7SStefano Zampini #endif
1520eeffb40dSHong Zhang     break;
1521ffa07934SHong Zhang   case MAT_SPD:
1522d71ae5a4SJacob Faibussowitsch   case MAT_SYMMETRIC:
1523d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1524d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1525eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1526eb1ec7c1SStefano Zampini     if (flg) { /* restore to use default mat-vec ops */
1527eb1ec7c1SStefano Zampini       A->ops->mult             = MatMult_MPISBAIJ;
1528eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1529eb1ec7c1SStefano Zampini       A->ops->multtranspose    = MatMult_MPISBAIJ;
1530eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1531eb1ec7c1SStefano Zampini     }
1532eb1ec7c1SStefano Zampini #endif
1533eeffb40dSHong Zhang     break;
153477e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
153543674050SBarry Smith     MatCheckPreallocated(A, 1);
15369566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1537eeffb40dSHong Zhang     break;
15389a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1539b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
15405f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
15419566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
154277e54ba9SKris Buschelman     break;
1543d71ae5a4SJacob Faibussowitsch   case MAT_SPD_ETERNAL:
1544d71ae5a4SJacob Faibussowitsch     break;
1545d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_LOWER_TRIANGULAR:
1546d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1547d71ae5a4SJacob Faibussowitsch     break;
1548d71ae5a4SJacob Faibussowitsch   case MAT_ERROR_LOWER_TRIANGULAR:
1549d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1550d71ae5a4SJacob Faibussowitsch     break;
1551d71ae5a4SJacob Faibussowitsch   case MAT_GETROW_UPPERTRIANGULAR:
1552d71ae5a4SJacob Faibussowitsch     aA->getrow_utriangular = flg;
1553d71ae5a4SJacob Faibussowitsch     break;
1554d71ae5a4SJacob Faibussowitsch   default:
1555d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1556a30f8f8cSSatish Balay   }
15573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1558a30f8f8cSSatish Balay }
1559a30f8f8cSSatish Balay 
1560d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1561d71ae5a4SJacob Faibussowitsch {
1562a30f8f8cSSatish Balay   PetscFunctionBegin;
15637fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1564cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
15659566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1566cf37664fSBarry Smith   } else if (reuse == MAT_REUSE_MATRIX) {
15679566063dSJacob Faibussowitsch     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1568fc4dec0aSBarry Smith   }
15693ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1570a30f8f8cSSatish Balay }
1571a30f8f8cSSatish Balay 
1572d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1573d71ae5a4SJacob Faibussowitsch {
1574a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1575a30f8f8cSSatish Balay   Mat           a = baij->A, b = baij->B;
15765e90f9d9SHong Zhang   PetscInt      nv, m, n;
1577ace3abfcSBarry Smith   PetscBool     flg;
1578a30f8f8cSSatish Balay 
1579a30f8f8cSSatish Balay   PetscFunctionBegin;
1580a30f8f8cSSatish Balay   if (ll != rr) {
15819566063dSJacob Faibussowitsch     PetscCall(VecEqual(ll, rr, &flg));
15825f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1583a30f8f8cSSatish Balay   }
15843ba16761SJacob Faibussowitsch   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1585b3bf805bSHong Zhang 
15869566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
15875f80ce2aSJacob Faibussowitsch   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1588b3bf805bSHong Zhang 
15899566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(rr, &nv));
15905f80ce2aSJacob Faibussowitsch   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
15915e90f9d9SHong Zhang 
15929566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
15935e90f9d9SHong Zhang 
15945e90f9d9SHong Zhang   /* left diagonalscale the off-diagonal part */
1595dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
15965e90f9d9SHong Zhang 
15975e90f9d9SHong Zhang   /* scale the diagonal part */
1598dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1599a30f8f8cSSatish Balay 
16005e90f9d9SHong Zhang   /* right diagonalscale the off-diagonal part */
16019566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1602dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
16033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1604a30f8f8cSSatish Balay }
1605a30f8f8cSSatish Balay 
1606d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1607d71ae5a4SJacob Faibussowitsch {
1608f3566a2aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1609a30f8f8cSSatish Balay 
1610a30f8f8cSSatish Balay   PetscFunctionBegin;
16119566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
16123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1613a30f8f8cSSatish Balay }
1614a30f8f8cSSatish Balay 
16156849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1616a30f8f8cSSatish Balay 
1617d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1618d71ae5a4SJacob Faibussowitsch {
1619a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1620a30f8f8cSSatish Balay   Mat           a, b, c, d;
1621ace3abfcSBarry Smith   PetscBool     flg;
1622a30f8f8cSSatish Balay 
1623a30f8f8cSSatish Balay   PetscFunctionBegin;
16249371c9d4SSatish Balay   a = matA->A;
16259371c9d4SSatish Balay   b = matA->B;
16269371c9d4SSatish Balay   c = matB->A;
16279371c9d4SSatish Balay   d = matB->B;
1628a30f8f8cSSatish Balay 
16299566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
163048a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
16311c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
16323ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1633a30f8f8cSSatish Balay }
1634a30f8f8cSSatish Balay 
1635d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1636d71ae5a4SJacob Faibussowitsch {
16374c7a3774SStefano Zampini   PetscBool isbaij;
16383c896bc6SHong Zhang 
16393c896bc6SHong Zhang   PetscFunctionBegin;
16409566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
16415f80ce2aSJacob Faibussowitsch   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
16423c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
16433c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
16449566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(A));
16459566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
16469566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(A));
16473c896bc6SHong Zhang   } else {
16484c7a3774SStefano Zampini     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
16494c7a3774SStefano Zampini     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
16504c7a3774SStefano Zampini 
16519566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
16529566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
16533c896bc6SHong Zhang   }
16549566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
16553ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16563c896bc6SHong Zhang }
16573c896bc6SHong Zhang 
1658d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1659d71ae5a4SJacob Faibussowitsch {
16604fe895cdSHong Zhang   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
16614fe895cdSHong Zhang   PetscBLASInt  bnz, one                          = 1;
16624fe895cdSHong Zhang   Mat_SeqSBAIJ *xa, *ya;
16634fe895cdSHong Zhang   Mat_SeqBAIJ  *xb, *yb;
16644fe895cdSHong Zhang 
16654fe895cdSHong Zhang   PetscFunctionBegin;
16664fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
16674fe895cdSHong Zhang     PetscScalar alpha = a;
16684fe895cdSHong Zhang     xa                = (Mat_SeqSBAIJ *)xx->A->data;
16694fe895cdSHong Zhang     ya                = (Mat_SeqSBAIJ *)yy->A->data;
16709566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1671792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
16724fe895cdSHong Zhang     xb = (Mat_SeqBAIJ *)xx->B->data;
16734fe895cdSHong Zhang     yb = (Mat_SeqBAIJ *)yy->B->data;
16749566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1675792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
16769566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1677ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
16789566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
16799566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
16809566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
16814fe895cdSHong Zhang   } else {
16824de5dceeSHong Zhang     Mat       B;
16834de5dceeSHong Zhang     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
16845f80ce2aSJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
16859566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(X));
16869566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(Y));
16879566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
16889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
16899566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
16909566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
16919566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
16929566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
16939566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, MATMPISBAIJ));
16949566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
16959566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
16969566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
16979566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
16989566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
16999566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
17009566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
17019566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(X));
17029566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(Y));
17034fe895cdSHong Zhang   }
17043ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17054fe895cdSHong Zhang }
17064fe895cdSHong Zhang 
1707d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1708d71ae5a4SJacob Faibussowitsch {
17091302d50aSBarry Smith   PetscInt  i;
1710afebec48SHong Zhang   PetscBool flg;
1711a5e6ed63SBarry Smith 
17126849ba73SBarry Smith   PetscFunctionBegin;
17139566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1714a5e6ed63SBarry Smith   for (i = 0; i < n; i++) {
17159566063dSJacob Faibussowitsch     PetscCall(ISEqual(irow[i], icol[i], &flg));
171648a46eb9SPierre Jolivet     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
17174dcd73b1SHong Zhang   }
17183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1719a5e6ed63SBarry Smith }
1720a5e6ed63SBarry Smith 
1721d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1722d71ae5a4SJacob Faibussowitsch {
17237d68702bSBarry Smith   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
17246f33a894SBarry Smith   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
17257d68702bSBarry Smith 
17267d68702bSBarry Smith   PetscFunctionBegin;
17276f33a894SBarry Smith   if (!Y->preallocated) {
17289566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
17296f33a894SBarry Smith   } else if (!aij->nz) {
1730b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
17319566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1732b83222d8SBarry Smith     aij->nonew = nonew;
17337d68702bSBarry Smith   }
17349566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
17353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17367d68702bSBarry Smith }
17377d68702bSBarry Smith 
1738d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1739d71ae5a4SJacob Faibussowitsch {
17403b49f96aSBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
17413b49f96aSBarry Smith 
17423b49f96aSBarry Smith   PetscFunctionBegin;
17435f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
17449566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
17453b49f96aSBarry Smith   if (d) {
17463b49f96aSBarry Smith     PetscInt rstart;
17479566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
17483b49f96aSBarry Smith     *d += rstart / A->rmap->bs;
17493b49f96aSBarry Smith   }
17503ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17513b49f96aSBarry Smith }
17523b49f96aSBarry Smith 
1753d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1754d71ae5a4SJacob Faibussowitsch {
1755a5b7ff6bSBarry Smith   PetscFunctionBegin;
1756a5b7ff6bSBarry Smith   *a = ((Mat_MPISBAIJ *)A->data)->A;
17573ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1758a5b7ff6bSBarry Smith }
17593b49f96aSBarry Smith 
17603964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1761a30f8f8cSSatish Balay                                        MatGetRow_MPISBAIJ,
1762a30f8f8cSSatish Balay                                        MatRestoreRow_MPISBAIJ,
1763a9d4b620SHong Zhang                                        MatMult_MPISBAIJ,
176497304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPISBAIJ,
1765431c96f7SBarry Smith                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1766431c96f7SBarry Smith                                        MatMultAdd_MPISBAIJ,
1767f4259b30SLisandro Dalcin                                        NULL,
1768f4259b30SLisandro Dalcin                                        NULL,
1769f4259b30SLisandro Dalcin                                        NULL,
1770f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1771f4259b30SLisandro Dalcin                                        NULL,
1772f4259b30SLisandro Dalcin                                        NULL,
177341f059aeSBarry Smith                                        MatSOR_MPISBAIJ,
1774a30f8f8cSSatish Balay                                        MatTranspose_MPISBAIJ,
177597304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPISBAIJ,
1776a30f8f8cSSatish Balay                                        MatEqual_MPISBAIJ,
1777a30f8f8cSSatish Balay                                        MatGetDiagonal_MPISBAIJ,
1778a30f8f8cSSatish Balay                                        MatDiagonalScale_MPISBAIJ,
1779a30f8f8cSSatish Balay                                        MatNorm_MPISBAIJ,
178097304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1781a30f8f8cSSatish Balay                                        MatAssemblyEnd_MPISBAIJ,
1782a30f8f8cSSatish Balay                                        MatSetOption_MPISBAIJ,
1783a30f8f8cSSatish Balay                                        MatZeroEntries_MPISBAIJ,
1784f4259b30SLisandro Dalcin                                        /* 24*/ NULL,
1785f4259b30SLisandro Dalcin                                        NULL,
1786f4259b30SLisandro Dalcin                                        NULL,
1787f4259b30SLisandro Dalcin                                        NULL,
1788f4259b30SLisandro Dalcin                                        NULL,
178926cec326SBarry Smith                                        /* 29*/ MatSetUp_MPI_Hash,
1790f4259b30SLisandro Dalcin                                        NULL,
1791f4259b30SLisandro Dalcin                                        NULL,
1792a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPISBAIJ,
1793f4259b30SLisandro Dalcin                                        NULL,
1794d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPISBAIJ,
1795f4259b30SLisandro Dalcin                                        NULL,
1796f4259b30SLisandro Dalcin                                        NULL,
1797f4259b30SLisandro Dalcin                                        NULL,
1798f4259b30SLisandro Dalcin                                        NULL,
1799d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPISBAIJ,
18007dae84e0SHong Zhang                                        MatCreateSubMatrices_MPISBAIJ,
1801d94109b8SHong Zhang                                        MatIncreaseOverlap_MPISBAIJ,
1802a30f8f8cSSatish Balay                                        MatGetValues_MPISBAIJ,
18033c896bc6SHong Zhang                                        MatCopy_MPISBAIJ,
1804f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
1805a30f8f8cSSatish Balay                                        MatScale_MPISBAIJ,
18067d68702bSBarry Smith                                        MatShift_MPISBAIJ,
1807f4259b30SLisandro Dalcin                                        NULL,
1808f4259b30SLisandro Dalcin                                        NULL,
1809f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
1810f4259b30SLisandro Dalcin                                        NULL,
1811f4259b30SLisandro Dalcin                                        NULL,
1812f4259b30SLisandro Dalcin                                        NULL,
1813f4259b30SLisandro Dalcin                                        NULL,
1814f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1815f4259b30SLisandro Dalcin                                        NULL,
1816a30f8f8cSSatish Balay                                        MatSetUnfactored_MPISBAIJ,
1817f4259b30SLisandro Dalcin                                        NULL,
1818a30f8f8cSSatish Balay                                        MatSetValuesBlocked_MPISBAIJ,
18197dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1820f4259b30SLisandro Dalcin                                        NULL,
1821f4259b30SLisandro Dalcin                                        NULL,
1822f4259b30SLisandro Dalcin                                        NULL,
1823f4259b30SLisandro Dalcin                                        NULL,
1824f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1825f4259b30SLisandro Dalcin                                        NULL,
1826f4259b30SLisandro Dalcin                                        NULL,
1827f4259b30SLisandro Dalcin                                        NULL,
1828f4259b30SLisandro Dalcin                                        NULL,
1829d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1830f4259b30SLisandro Dalcin                                        NULL,
183128d58a37SPierre Jolivet                                        MatConvert_MPISBAIJ_Basic,
1832f4259b30SLisandro Dalcin                                        NULL,
1833f4259b30SLisandro Dalcin                                        NULL,
1834f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1835f4259b30SLisandro Dalcin                                        NULL,
1836f4259b30SLisandro Dalcin                                        NULL,
1837f4259b30SLisandro Dalcin                                        NULL,
1838f4259b30SLisandro Dalcin                                        NULL,
1839f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1840f4259b30SLisandro Dalcin                                        NULL,
1841f4259b30SLisandro Dalcin                                        NULL,
1842f4259b30SLisandro Dalcin                                        NULL,
18435bba2384SShri Abhyankar                                        MatLoad_MPISBAIJ,
1844f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
1845f4259b30SLisandro Dalcin                                        NULL,
1846f4259b30SLisandro Dalcin                                        NULL,
1847f4259b30SLisandro Dalcin                                        NULL,
1848f4259b30SLisandro Dalcin                                        NULL,
1849f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1850f4259b30SLisandro Dalcin                                        NULL,
1851f4259b30SLisandro Dalcin                                        NULL,
1852f4259b30SLisandro Dalcin                                        NULL,
1853f4259b30SLisandro Dalcin                                        NULL,
1854f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1855f4259b30SLisandro Dalcin                                        NULL,
1856f4259b30SLisandro Dalcin                                        NULL,
1857f4259b30SLisandro Dalcin                                        NULL,
1858f4259b30SLisandro Dalcin                                        NULL,
1859f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
1860f4259b30SLisandro Dalcin                                        NULL,
1861f4259b30SLisandro Dalcin                                        NULL,
18622726fb6dSPierre Jolivet                                        MatConjugate_MPISBAIJ,
1863f4259b30SLisandro Dalcin                                        NULL,
1864f4259b30SLisandro Dalcin                                        /*104*/ NULL,
186599cafbc1SBarry Smith                                        MatRealPart_MPISBAIJ,
1866d0d4cfc2SHong Zhang                                        MatImaginaryPart_MPISBAIJ,
1867d0d4cfc2SHong Zhang                                        MatGetRowUpperTriangular_MPISBAIJ,
186895936485SShri Abhyankar                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1869f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1870f4259b30SLisandro Dalcin                                        NULL,
1871f4259b30SLisandro Dalcin                                        NULL,
1872f4259b30SLisandro Dalcin                                        NULL,
18733b49f96aSBarry Smith                                        MatMissingDiagonal_MPISBAIJ,
1874f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1875f4259b30SLisandro Dalcin                                        NULL,
1876f4259b30SLisandro Dalcin                                        NULL,
1877f4259b30SLisandro Dalcin                                        NULL,
1878f4259b30SLisandro Dalcin                                        NULL,
1879f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1880f4259b30SLisandro Dalcin                                        NULL,
1881f4259b30SLisandro Dalcin                                        NULL,
1882f4259b30SLisandro Dalcin                                        NULL,
1883f4259b30SLisandro Dalcin                                        NULL,
1884f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1885f4259b30SLisandro Dalcin                                        NULL,
1886f4259b30SLisandro Dalcin                                        NULL,
1887f4259b30SLisandro Dalcin                                        NULL,
1888f4259b30SLisandro Dalcin                                        NULL,
1889f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1890f4259b30SLisandro Dalcin                                        NULL,
1891f4259b30SLisandro Dalcin                                        NULL,
1892f4259b30SLisandro Dalcin                                        NULL,
1893f4259b30SLisandro Dalcin                                        NULL,
1894f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1895f4259b30SLisandro Dalcin                                        NULL,
1896f4259b30SLisandro Dalcin                                        NULL,
1897f4259b30SLisandro Dalcin                                        NULL,
1898f4259b30SLisandro Dalcin                                        NULL,
189946533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
1900f4259b30SLisandro Dalcin                                        NULL,
1901f4259b30SLisandro Dalcin                                        NULL,
1902f4259b30SLisandro Dalcin                                        NULL,
1903f4259b30SLisandro Dalcin                                        NULL,
1904d70f29a3SPierre Jolivet                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1905d70f29a3SPierre Jolivet                                        NULL,
1906d70f29a3SPierre Jolivet                                        NULL,
190799a7f59eSMark Adams                                        NULL,
190899a7f59eSMark Adams                                        NULL,
19097fb60732SBarry Smith                                        NULL,
1910dec0b466SHong Zhang                                        /*150*/ NULL,
1911dec0b466SHong Zhang                                        NULL};
1912a30f8f8cSSatish Balay 
1913d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1914d71ae5a4SJacob Faibussowitsch {
1915476417e5SBarry Smith   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1916535b19f3SBarry Smith   PetscInt      i, mbs, Mbs;
19175d2a9ed1SStefano Zampini   PetscMPIInt   size;
1918a23d5eceSKris Buschelman 
1919a23d5eceSKris Buschelman   PetscFunctionBegin;
1920ad79cf63SBarry Smith   if (B->hash_active) {
1921ad79cf63SBarry Smith     PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
1922ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
1923ad79cf63SBarry Smith   }
1924ad79cf63SBarry Smith   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
19259566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
19269566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19279566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
19289566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
19295f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
19305f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1931899cda47SBarry Smith 
1932d0f46423SBarry Smith   mbs = B->rmap->n / bs;
1933d0f46423SBarry Smith   Mbs = B->rmap->N / bs;
19345f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1935a23d5eceSKris Buschelman 
1936d0f46423SBarry Smith   B->rmap->bs = bs;
1937a23d5eceSKris Buschelman   b->bs2      = bs * bs;
1938a23d5eceSKris Buschelman   b->mbs      = mbs;
1939a23d5eceSKris Buschelman   b->Mbs      = Mbs;
1940de64b629SHong Zhang   b->nbs      = B->cmap->n / bs;
1941de64b629SHong Zhang   b->Nbs      = B->cmap->N / bs;
1942a23d5eceSKris Buschelman 
1943ad540459SPierre Jolivet   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1944d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart / bs;
1945d0f46423SBarry Smith   b->rendbs   = B->rmap->rend / bs;
1946a23d5eceSKris Buschelman 
1947d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart / bs;
1948d0f46423SBarry Smith   b->cendbs   = B->cmap->rend / bs;
1949a23d5eceSKris Buschelman 
1950cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
1951eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&b->colmap));
1952cb7b82ddSBarry Smith #else
19539566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
1954cb7b82ddSBarry Smith #endif
19559566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
19569566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
19579566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
19589566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0));
19599566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0b));
19609566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1));
19619566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1a));
19629566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1b));
19639566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->sMvctx));
1964cb7b82ddSBarry Smith 
19659566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
19669566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
19679566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
19689566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
19699566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1970cb7b82ddSBarry Smith 
1971ad79cf63SBarry Smith   PetscCall(MatDestroy(&b->A));
19729566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
19739566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
19749566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1975a23d5eceSKris Buschelman 
19769566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
19779566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
197826fbe8dcSKarl Rupp 
1979526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
1980cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
1981cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
19823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1983a23d5eceSKris Buschelman }
1984a23d5eceSKris Buschelman 
1985d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1986d71ae5a4SJacob Faibussowitsch {
198702106b30SBarry Smith   PetscInt        m, rstart, cend;
1988f4259b30SLisandro Dalcin   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1989f4259b30SLisandro Dalcin   const PetscInt *JJ          = NULL;
1990f4259b30SLisandro Dalcin   PetscScalar    *values      = NULL;
1991bb80cfbbSStefano Zampini   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
19923bd0feecSPierre Jolivet   PetscBool       nooffprocentries;
1993dfb205c3SBarry Smith 
1994dfb205c3SBarry Smith   PetscFunctionBegin;
19955f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
19969566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
19979566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
19989566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19999566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
20009566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2001dfb205c3SBarry Smith   m      = B->rmap->n / bs;
2002dfb205c3SBarry Smith   rstart = B->rmap->rstart / bs;
2003dfb205c3SBarry Smith   cend   = B->cmap->rend / bs;
2004dfb205c3SBarry Smith 
20055f80ce2aSJacob Faibussowitsch   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
20069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2007dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2008dfb205c3SBarry Smith     nz = ii[i + 1] - ii[i];
20095f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
20100cd7f59aSBarry Smith     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2011dfb205c3SBarry Smith     JJ = jj + ii[i];
20120cd7f59aSBarry Smith     bd = 0;
2013dfb205c3SBarry Smith     for (j = 0; j < nz; j++) {
20140cd7f59aSBarry Smith       if (*JJ >= i + rstart) break;
2015dfb205c3SBarry Smith       JJ++;
20160cd7f59aSBarry Smith       bd++;
2017dfb205c3SBarry Smith     }
2018dfb205c3SBarry Smith     d = 0;
2019dfb205c3SBarry Smith     for (; j < nz; j++) {
2020dfb205c3SBarry Smith       if (*JJ++ >= cend) break;
2021dfb205c3SBarry Smith       d++;
2022dfb205c3SBarry Smith     }
2023dfb205c3SBarry Smith     d_nnz[i] = d;
20240cd7f59aSBarry Smith     o_nnz[i] = nz - d - bd;
20250cd7f59aSBarry Smith     nz       = nz - bd;
20260cd7f59aSBarry Smith     nz_max   = PetscMax(nz_max, nz);
2027dfb205c3SBarry Smith   }
20289566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
20299566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
20309566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
2031dfb205c3SBarry Smith 
2032dfb205c3SBarry Smith   values = (PetscScalar *)V;
203348a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2034dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2035dfb205c3SBarry Smith     PetscInt        row   = i + rstart;
2036dfb205c3SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
2037dfb205c3SBarry Smith     const PetscInt *icols = jj + ii[i];
2038bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2039dfb205c3SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
20409566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2041bb80cfbbSStefano Zampini     } else { /* block ordering does not match so we can only insert one block at a time. */
2042bb80cfbbSStefano Zampini       PetscInt j;
20430cd7f59aSBarry Smith       for (j = 0; j < ncols; j++) {
20440cd7f59aSBarry Smith         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
20459566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
20460cd7f59aSBarry Smith       }
20470cd7f59aSBarry Smith     }
2048dfb205c3SBarry Smith   }
2049dfb205c3SBarry Smith 
20509566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
20513bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
20523bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
20539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
20549566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
20553bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
20563bd0feecSPierre Jolivet 
20579566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
20583ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2059dfb205c3SBarry Smith }
2060dfb205c3SBarry Smith 
20610bad9183SKris Buschelman /*MC
2062fafad747SKris Buschelman    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2063828413b8SBarry Smith    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2064828413b8SBarry Smith    the matrix is stored.
2065828413b8SBarry Smith 
2066828413b8SBarry Smith    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
206711a5261eSBarry Smith    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
20680bad9183SKris Buschelman 
20692ef1f0ffSBarry Smith    Options Database Key:
207011a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
20710bad9183SKris Buschelman 
20722ef1f0ffSBarry Smith    Level: beginner
20732ef1f0ffSBarry Smith 
207411a5261eSBarry Smith    Note:
2075476417e5SBarry Smith      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2076476417e5SBarry Smith      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2077476417e5SBarry Smith 
2078*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
20790bad9183SKris Buschelman M*/
20800bad9183SKris Buschelman 
2081d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2082d71ae5a4SJacob Faibussowitsch {
2083b5df2d14SHong Zhang   Mat_MPISBAIJ *b;
208494ae4db5SBarry Smith   PetscBool     flg = PETSC_FALSE;
2085b5df2d14SHong Zhang 
2086b5df2d14SHong Zhang   PetscFunctionBegin;
20874dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
2088b0a32e0cSBarry Smith   B->data = (void *)b;
20899566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2090b5df2d14SHong Zhang 
2091b5df2d14SHong Zhang   B->ops->destroy = MatDestroy_MPISBAIJ;
2092b5df2d14SHong Zhang   B->ops->view    = MatView_MPISBAIJ;
2093b5df2d14SHong Zhang   B->assembled    = PETSC_FALSE;
2094b5df2d14SHong Zhang   B->insertmode   = NOT_SET_VALUES;
209526fbe8dcSKarl Rupp 
20969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
20979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2098b5df2d14SHong Zhang 
2099b5df2d14SHong Zhang   /* build local table of row and column ownerships */
21009566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2101b5df2d14SHong Zhang 
2102b5df2d14SHong Zhang   /* build cache for off array entries formed */
21039566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
210426fbe8dcSKarl Rupp 
2105b5df2d14SHong Zhang   b->donotstash  = PETSC_FALSE;
21060298fd71SBarry Smith   b->colmap      = NULL;
21070298fd71SBarry Smith   b->garray      = NULL;
2108b5df2d14SHong Zhang   b->roworiented = PETSC_TRUE;
2109b5df2d14SHong Zhang 
2110b5df2d14SHong Zhang   /* stuff used in block assembly */
2111f4259b30SLisandro Dalcin   b->barray = NULL;
2112b5df2d14SHong Zhang 
2113b5df2d14SHong Zhang   /* stuff used for matrix vector multiply */
2114f4259b30SLisandro Dalcin   b->lvec    = NULL;
2115f4259b30SLisandro Dalcin   b->Mvctx   = NULL;
2116f4259b30SLisandro Dalcin   b->slvec0  = NULL;
2117f4259b30SLisandro Dalcin   b->slvec0b = NULL;
2118f4259b30SLisandro Dalcin   b->slvec1  = NULL;
2119f4259b30SLisandro Dalcin   b->slvec1a = NULL;
2120f4259b30SLisandro Dalcin   b->slvec1b = NULL;
2121f4259b30SLisandro Dalcin   b->sMvctx  = NULL;
2122b5df2d14SHong Zhang 
2123b5df2d14SHong Zhang   /* stuff for MatGetRow() */
2124f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2125f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2126b5df2d14SHong Zhang   b->getrowactive = PETSC_FALSE;
2127b5df2d14SHong Zhang 
2128b5df2d14SHong Zhang   /* hash table stuff */
2129f4259b30SLisandro Dalcin   b->ht           = NULL;
2130f4259b30SLisandro Dalcin   b->hd           = NULL;
2131b5df2d14SHong Zhang   b->ht_size      = 0;
2132b5df2d14SHong Zhang   b->ht_flag      = PETSC_FALSE;
2133b5df2d14SHong Zhang   b->ht_fact      = 0;
2134b5df2d14SHong Zhang   b->ht_total_ct  = 0;
2135b5df2d14SHong Zhang   b->ht_insert_ct = 0;
2136b5df2d14SHong Zhang 
21377dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
21387a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
21397a868f3eSHong Zhang 
2140f4259b30SLisandro Dalcin   b->in_loc = NULL;
2141f4259b30SLisandro Dalcin   b->v_loc  = NULL;
214259ffdab8SBarry Smith   b->n_loc  = 0;
214394ae4db5SBarry Smith 
21449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
21459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
21469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
21479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
21486214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
21499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
21506214f412SHong Zhang #endif
2151d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
21529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2153d24d4204SJose E. Roman #endif
21549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
21559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2156aa5a9175SDahai Guo 
2157b94d7dedSBarry Smith   B->symmetric                   = PETSC_BOOL3_TRUE;
2158b94d7dedSBarry Smith   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2159b94d7dedSBarry Smith   B->symmetry_eternal            = PETSC_TRUE;
2160b94d7dedSBarry Smith   B->structural_symmetry_eternal = PETSC_TRUE;
2161eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
2162b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_FALSE;
2163eb1ec7c1SStefano Zampini #else
2164b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_TRUE;
2165eb1ec7c1SStefano Zampini #endif
216613647f61SHong Zhang 
21679566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2168d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
21699566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
217094ae4db5SBarry Smith   if (flg) {
217194ae4db5SBarry Smith     PetscReal fact = 1.39;
21729566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
21739566063dSJacob Faibussowitsch     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
217494ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
21759566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
21769566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
217794ae4db5SBarry Smith   }
2178d0609cedSBarry Smith   PetscOptionsEnd();
21793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2180b5df2d14SHong Zhang }
2181b5df2d14SHong Zhang 
2182209238afSKris Buschelman /*MC
2183002d173eSKris Buschelman    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2184209238afSKris Buschelman 
218511a5261eSBarry Smith    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
218611a5261eSBarry Smith    and `MATMPISBAIJ` otherwise.
2187209238afSKris Buschelman 
218811a5261eSBarry Smith    Options Database Key:
2189c5dec841SPierre Jolivet . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2190209238afSKris Buschelman 
2191209238afSKris Buschelman   Level: beginner
2192209238afSKris Buschelman 
2193*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2194209238afSKris Buschelman M*/
2195209238afSKris Buschelman 
2196b5df2d14SHong Zhang /*@C
2197b5df2d14SHong Zhang    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2198b5df2d14SHong Zhang    the user should preallocate the matrix storage by setting the parameters
2199b5df2d14SHong Zhang    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2200b5df2d14SHong Zhang    performance can be increased by more than a factor of 50.
2201b5df2d14SHong Zhang 
2202c3339decSBarry Smith    Collective
2203b5df2d14SHong Zhang 
2204b5df2d14SHong Zhang    Input Parameters:
22051c4f3114SJed Brown +  B - the matrix
2206bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2207bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2208b5df2d14SHong Zhang .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2209b5df2d14SHong Zhang            submatrix  (same for all local rows)
2210b5df2d14SHong Zhang .  d_nnz - array containing the number of block nonzeros in the various block rows
22116d10fdaeSSatish Balay            in the upper triangular and diagonal part of the in diagonal portion of the local
22122ef1f0ffSBarry Smith            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
221395742e49SBarry Smith            for the diagonal entry and set a value even if it is zero.
2214b5df2d14SHong Zhang .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2215b5df2d14SHong Zhang            submatrix (same for all local rows).
2216b5df2d14SHong Zhang -  o_nnz - array containing the number of nonzeros in the various block rows of the
2217c2fc9fa9SBarry Smith            off-diagonal portion of the local submatrix that is right of the diagonal
22182ef1f0ffSBarry Smith            (possibly different for each block row) or `NULL`.
2219b5df2d14SHong Zhang 
2220b5df2d14SHong Zhang    Options Database Keys:
2221a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2222b5df2d14SHong Zhang                      block calculations (much slower)
2223a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
2224b5df2d14SHong Zhang 
22252ef1f0ffSBarry Smith    Level: intermediate
22262ef1f0ffSBarry Smith 
2227b5df2d14SHong Zhang    Notes:
2228b5df2d14SHong Zhang 
222911a5261eSBarry Smith    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2230b5df2d14SHong Zhang    than it must be used on all processors that share the object for that argument.
2231b5df2d14SHong Zhang 
223249a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
223349a6f317SBarry Smith 
2234b5df2d14SHong Zhang    Storage Information:
2235b5df2d14SHong Zhang    For a square global matrix we define each processor's diagonal portion
2236b5df2d14SHong Zhang    to be its local rows and the corresponding columns (a square submatrix);
2237b5df2d14SHong Zhang    each processor's off-diagonal portion encompasses the remainder of the
2238b5df2d14SHong Zhang    local matrix (a rectangular submatrix).
2239b5df2d14SHong Zhang 
2240b5df2d14SHong Zhang    The user can specify preallocated storage for the diagonal part of
22412ef1f0ffSBarry Smith    the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
22422ef1f0ffSBarry Smith    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2243b5df2d14SHong Zhang    memory allocation.  Likewise, specify preallocated storage for the
22442ef1f0ffSBarry Smith    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2245b5df2d14SHong Zhang 
224611a5261eSBarry Smith    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2247aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
22482ef1f0ffSBarry Smith    You can also run with the option `-info` and look for messages with the string
2249aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
2250aa95bbe8SBarry Smith 
2251b5df2d14SHong Zhang    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2252b5df2d14SHong Zhang    the figure below we depict these three local rows and all columns (0-11).
2253b5df2d14SHong Zhang 
2254b5df2d14SHong Zhang .vb
2255b5df2d14SHong Zhang            0 1 2 3 4 5 6 7 8 9 10 11
2256a4b1a0f6SJed Brown           --------------------------
2257c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2258c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2259c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2260a4b1a0f6SJed Brown           --------------------------
2261b5df2d14SHong Zhang .ve
2262b5df2d14SHong Zhang 
2263b5df2d14SHong Zhang    Thus, any entries in the d locations are stored in the d (diagonal)
2264b5df2d14SHong Zhang    submatrix, and any entries in the o locations are stored in the
22656d10fdaeSSatish Balay    o (off-diagonal) submatrix.  Note that the d matrix is stored in
226611a5261eSBarry Smith    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2267b5df2d14SHong Zhang 
22682ef1f0ffSBarry Smith    Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
22696d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
22702ef1f0ffSBarry Smith    and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2271c2fc9fa9SBarry Smith 
2272b5df2d14SHong Zhang    In general, for PDE problems in which most nonzeros are near the diagonal,
22732ef1f0ffSBarry Smith    one expects `d_nz` >> `o_nz`.
2274b5df2d14SHong Zhang 
2275*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2276b5df2d14SHong Zhang @*/
2277d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2278d71ae5a4SJacob Faibussowitsch {
2279b5df2d14SHong Zhang   PetscFunctionBegin;
22806ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
22816ba663aaSJed Brown   PetscValidType(B, 1);
22826ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
2283cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
22843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2285b5df2d14SHong Zhang }
2286b5df2d14SHong Zhang 
2287a30f8f8cSSatish Balay /*@C
228811a5261eSBarry Smith    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2289a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2290a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
229120f4b53cSBarry Smith    `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2292a30f8f8cSSatish Balay 
2293d083f849SBarry Smith    Collective
2294a30f8f8cSSatish Balay 
2295a30f8f8cSSatish Balay    Input Parameters:
2296a30f8f8cSSatish Balay +  comm - MPI communicator
229711a5261eSBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
229820f4b53cSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
229920f4b53cSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2300a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2301a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
230220f4b53cSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2303a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2304a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
230520f4b53cSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
230620f4b53cSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2307a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2308a30f8f8cSSatish Balay            submatrix (same for all local rows)
2309a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
23106d10fdaeSSatish Balay            in the upper triangular portion of the in diagonal portion of the local
23112ef1f0ffSBarry Smith            (possibly different for each block block row) or `NULL`.
231295742e49SBarry Smith            If you plan to factor the matrix you must leave room for the diagonal entry and
231395742e49SBarry Smith            set its value even if it is zero.
2314a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2315a30f8f8cSSatish Balay            submatrix (same for all local rows).
2316a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2317a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
23182ef1f0ffSBarry Smith            each block row) or `NULL`.
2319a30f8f8cSSatish Balay 
2320a30f8f8cSSatish Balay    Output Parameter:
2321a30f8f8cSSatish Balay .  A - the matrix
2322a30f8f8cSSatish Balay 
2323a30f8f8cSSatish Balay    Options Database Keys:
2324a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2325a30f8f8cSSatish Balay                      block calculations (much slower)
2326a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2327a2b725a8SWilliam Gropp -   -mat_mpi - use the parallel matrix data structures even on one processor
2328a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2329a30f8f8cSSatish Balay 
23302ef1f0ffSBarry Smith    Level: intermediate
23312ef1f0ffSBarry Smith 
23322ef1f0ffSBarry Smith    Notes:
233311a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2334f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
233511a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2336175b88e8SBarry Smith 
2337d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
23386d6d819aSHong Zhang    This matrix type does not support complex Hermitian operation.
2339d1be2dadSMatthew Knepley 
2340a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2341a30f8f8cSSatish Balay    (possibly both).
2342a30f8f8cSSatish Balay 
234311a5261eSBarry Smith    If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2344a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2345a30f8f8cSSatish Balay 
234649a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
234749a6f317SBarry Smith 
2348a30f8f8cSSatish Balay    Storage Information:
2349a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2350a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2351a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2352a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2353a30f8f8cSSatish Balay 
2354a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
23552ef1f0ffSBarry Smith    the local submatrix with either `d_nz` or `d_nnz` (not both). Set
23562ef1f0ffSBarry Smith    `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2357a30f8f8cSSatish Balay    memory allocation. Likewise, specify preallocated storage for the
23582ef1f0ffSBarry Smith    off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2359a30f8f8cSSatish Balay 
2360a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2361a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2362a30f8f8cSSatish Balay 
2363a30f8f8cSSatish Balay .vb
2364a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2365a4b1a0f6SJed Brown           --------------------------
2366c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2367c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2368c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2369a4b1a0f6SJed Brown           --------------------------
2370a30f8f8cSSatish Balay .ve
2371a30f8f8cSSatish Balay 
2372a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2373a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
23746d10fdaeSSatish Balay    o (off-diagonal) submatrix. Note that the d matrix is stored in
23752ef1f0ffSBarry Smith    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2376a30f8f8cSSatish Balay 
23772ef1f0ffSBarry Smith    Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
23786d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
23792ef1f0ffSBarry Smith    and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2380a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
23812ef1f0ffSBarry Smith    one expects `d_nz` >> `o_nz`.
2382a30f8f8cSSatish Balay 
2383*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2384a30f8f8cSSatish Balay @*/
2385a30f8f8cSSatish Balay 
2386d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2387d71ae5a4SJacob Faibussowitsch {
23881302d50aSBarry Smith   PetscMPIInt size;
2389a30f8f8cSSatish Balay 
2390a30f8f8cSSatish Balay   PetscFunctionBegin;
23919566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
23929566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23939566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2394273d9f13SBarry Smith   if (size > 1) {
23959566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPISBAIJ));
23969566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2397273d9f13SBarry Smith   } else {
23989566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQSBAIJ));
23999566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2400273d9f13SBarry Smith   }
24013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2402a30f8f8cSSatish Balay }
2403a30f8f8cSSatish Balay 
2404d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2405d71ae5a4SJacob Faibussowitsch {
2406a30f8f8cSSatish Balay   Mat           mat;
2407a30f8f8cSSatish Balay   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2408d0f46423SBarry Smith   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2409387bc808SHong Zhang   PetscScalar  *array;
2410a30f8f8cSSatish Balay 
2411a30f8f8cSSatish Balay   PetscFunctionBegin;
2412f4259b30SLisandro Dalcin   *newmat = NULL;
241326fbe8dcSKarl Rupp 
24149566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
24159566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
24169566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
24179566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
24189566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2419e1b6402fSHong Zhang 
2420d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2421273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
242282327fa8SHong Zhang   mat->assembled    = PETSC_TRUE;
24237fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24247fff6886SHong Zhang 
2425b5df2d14SHong Zhang   a      = (Mat_MPISBAIJ *)mat->data;
2426a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2427a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2428a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2429a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2430a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2431a30f8f8cSSatish Balay 
2432a30f8f8cSSatish Balay   a->size         = oldmat->size;
2433a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2434a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2435a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2436f4259b30SLisandro Dalcin   a->rowindices   = NULL;
2437f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
2438a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2439f4259b30SLisandro Dalcin   a->barray       = NULL;
2440899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
2441899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
2442899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
2443899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
2444a30f8f8cSSatish Balay 
2445a30f8f8cSSatish Balay   /* hash table stuff */
2446f4259b30SLisandro Dalcin   a->ht           = NULL;
2447f4259b30SLisandro Dalcin   a->hd           = NULL;
2448a30f8f8cSSatish Balay   a->ht_size      = 0;
2449a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2450a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2451a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2452a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2453a30f8f8cSSatish Balay 
24549566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2455a30f8f8cSSatish Balay   if (oldmat->colmap) {
2456a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2457eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2458a30f8f8cSSatish Balay #else
24599566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
24609566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2461a30f8f8cSSatish Balay #endif
2462f4259b30SLisandro Dalcin   } else a->colmap = NULL;
2463387bc808SHong Zhang 
2464a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
24659566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len, &a->garray));
24669566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2467f4259b30SLisandro Dalcin   } else a->garray = NULL;
2468a30f8f8cSSatish Balay 
24699566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
24709566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
24719566063dSJacob Faibussowitsch   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
247282327fa8SHong Zhang 
24739566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
24749566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2475387bc808SHong Zhang 
24769566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(a->slvec1, &nt));
24779566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec1, &array));
24789566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
24799566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
24809566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec1, &array));
24819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &array));
24829566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
24839566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &array));
2484387bc808SHong Zhang 
2485387bc808SHong Zhang   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
24869566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2487387bc808SHong Zhang   a->sMvctx = oldmat->sMvctx;
248882327fa8SHong Zhang 
24899566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
24909566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
24919566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2492a30f8f8cSSatish Balay   *newmat = mat;
24933ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2494a30f8f8cSSatish Balay }
2495a30f8f8cSSatish Balay 
2496618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
2497618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2498618cc2edSLisandro Dalcin 
2499d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2500d71ae5a4SJacob Faibussowitsch {
25017f489da9SVaclav Hapla   PetscBool isbinary;
250295936485SShri Abhyankar 
250395936485SShri Abhyankar   PetscFunctionBegin;
25049566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
25055f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
25069566063dSJacob Faibussowitsch   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
25073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
250895936485SShri Abhyankar }
250995936485SShri Abhyankar 
2510d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2511d71ae5a4SJacob Faibussowitsch {
251224d5174aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2513f4c0e9e4SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2514ca54ac64SHong Zhang   PetscReal     atmp;
251587828ca2SBarry Smith   PetscReal    *work, *svalues, *rvalues;
25161302d50aSBarry Smith   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
25171302d50aSBarry Smith   PetscMPIInt   rank, size;
25181302d50aSBarry Smith   PetscInt     *rowners_bs, dest, count, source;
251987828ca2SBarry Smith   PetscScalar  *va;
25208a1c53f2SBarry Smith   MatScalar    *ba;
2521f4c0e9e4SHong Zhang   MPI_Status    stat;
252224d5174aSHong Zhang 
252324d5174aSHong Zhang   PetscFunctionBegin;
25245f80ce2aSJacob Faibussowitsch   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
25259566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
25269566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &va));
2527f4c0e9e4SHong Zhang 
25289566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
25299566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2530f4c0e9e4SHong Zhang 
2531d0f46423SBarry Smith   bs  = A->rmap->bs;
2532f4c0e9e4SHong Zhang   mbs = a->mbs;
2533f4c0e9e4SHong Zhang   Mbs = a->Mbs;
2534f4c0e9e4SHong Zhang   ba  = b->a;
2535f4c0e9e4SHong Zhang   bi  = b->i;
2536f4c0e9e4SHong Zhang   bj  = b->j;
2537f4c0e9e4SHong Zhang 
2538f4c0e9e4SHong Zhang   /* find ownerships */
2539d0f46423SBarry Smith   rowners_bs = A->rmap->range;
2540f4c0e9e4SHong Zhang 
2541f4c0e9e4SHong Zhang   /* each proc creates an array to be distributed */
25429566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs * Mbs, &work));
2543f4c0e9e4SHong Zhang 
2544f4c0e9e4SHong Zhang   /* row_max for B */
2545b8475685SHong Zhang   if (rank != size - 1) {
2546f4c0e9e4SHong Zhang     for (i = 0; i < mbs; i++) {
25479371c9d4SSatish Balay       ncols = bi[1] - bi[0];
25489371c9d4SSatish Balay       bi++;
2549f4c0e9e4SHong Zhang       brow = bs * i;
2550f4c0e9e4SHong Zhang       for (j = 0; j < ncols; j++) {
2551f4c0e9e4SHong Zhang         bcol = bs * (*bj);
2552f4c0e9e4SHong Zhang         for (kcol = 0; kcol < bs; kcol++) {
2553ca54ac64SHong Zhang           col = bcol + kcol;           /* local col index */
255404d41228SHong Zhang           col += rowners_bs[rank + 1]; /* global col index */
2555f4c0e9e4SHong Zhang           for (krow = 0; krow < bs; krow++) {
25569371c9d4SSatish Balay             atmp = PetscAbsScalar(*ba);
25579371c9d4SSatish Balay             ba++;
2558ca54ac64SHong Zhang             row = brow + krow; /* local row index */
2559ca54ac64SHong Zhang             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2560f4c0e9e4SHong Zhang             if (work[col] < atmp) work[col] = atmp;
2561f4c0e9e4SHong Zhang           }
2562f4c0e9e4SHong Zhang         }
2563f4c0e9e4SHong Zhang         bj++;
2564f4c0e9e4SHong Zhang       }
2565f4c0e9e4SHong Zhang     }
2566f4c0e9e4SHong Zhang 
2567f4c0e9e4SHong Zhang     /* send values to its owners */
2568f4c0e9e4SHong Zhang     for (dest = rank + 1; dest < size; dest++) {
2569f4c0e9e4SHong Zhang       svalues = work + rowners_bs[dest];
2570ca54ac64SHong Zhang       count   = rowners_bs[dest + 1] - rowners_bs[dest];
25719566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2572ca54ac64SHong Zhang     }
2573f4c0e9e4SHong Zhang   }
2574f4c0e9e4SHong Zhang 
2575f4c0e9e4SHong Zhang   /* receive values */
2576ca54ac64SHong Zhang   if (rank) {
2577f4c0e9e4SHong Zhang     rvalues = work;
2578ca54ac64SHong Zhang     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2579f4c0e9e4SHong Zhang     for (source = 0; source < rank; source++) {
25809566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2581f4c0e9e4SHong Zhang       /* process values */
2582f4c0e9e4SHong Zhang       for (i = 0; i < count; i++) {
2583ca54ac64SHong Zhang         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2584f4c0e9e4SHong Zhang       }
2585f4c0e9e4SHong Zhang     }
2586ca54ac64SHong Zhang   }
2587f4c0e9e4SHong Zhang 
25889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &va));
25899566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
25903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
259124d5174aSHong Zhang }
25922798e883SHong Zhang 
2593d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2594d71ae5a4SJacob Faibussowitsch {
25952798e883SHong Zhang   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2596d0f46423SBarry Smith   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
25973649974fSBarry Smith   PetscScalar       *x, *ptr, *from;
2598ffe4fb16SHong Zhang   Vec                bb1;
25993649974fSBarry Smith   const PetscScalar *b;
2600ffe4fb16SHong Zhang 
2601ffe4fb16SHong Zhang   PetscFunctionBegin;
26025f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
26035f80ce2aSJacob Faibussowitsch   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2604ffe4fb16SHong Zhang 
2605a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
26069566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
26073ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2608a2b30743SBarry Smith   }
2609a2b30743SBarry Smith 
2610ffe4fb16SHong Zhang   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2611ffe4fb16SHong Zhang     if (flag & SOR_ZERO_INITIAL_GUESS) {
26129566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2613ffe4fb16SHong Zhang       its--;
2614ffe4fb16SHong Zhang     }
2615ffe4fb16SHong Zhang 
26169566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb, &bb1));
2617ffe4fb16SHong Zhang     while (its--) {
2618ffe4fb16SHong Zhang       /* lower triangular part: slvec0b = - B^T*xx */
26199566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2620ffe4fb16SHong Zhang 
2621ffe4fb16SHong Zhang       /* copy xx into slvec0a */
26229566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec0, &ptr));
26239566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26249566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
26259566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2626ffe4fb16SHong Zhang 
26279566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->slvec0, -1.0));
2628ffe4fb16SHong Zhang 
2629ffe4fb16SHong Zhang       /* copy bb into slvec1a */
26309566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1, &ptr));
26319566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26329566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
26339566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2634ffe4fb16SHong Zhang 
2635ffe4fb16SHong Zhang       /* set slvec1b = 0 */
26369566063dSJacob Faibussowitsch       PetscCall(VecSet(mat->slvec1b, 0.0));
2637ffe4fb16SHong Zhang 
26389566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
26399566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
26409566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
26419566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2642ffe4fb16SHong Zhang 
2643ffe4fb16SHong Zhang       /* upper triangular part: bb1 = bb1 - B*x */
26449566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2645ffe4fb16SHong Zhang 
2646ffe4fb16SHong Zhang       /* local diagonal sweep */
26479566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2648ffe4fb16SHong Zhang     }
26499566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&bb1));
2650fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26519566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2652fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26539566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2654fa22f6d0SBarry Smith   } else if (flag & SOR_EISENSTAT) {
2655fa22f6d0SBarry Smith     Vec                xx1;
2656ace3abfcSBarry Smith     PetscBool          hasop;
265720f1ed55SBarry Smith     const PetscScalar *diag;
2658887ee2caSBarry Smith     PetscScalar       *sl, scale = (omega - 2.0) / omega;
265920f1ed55SBarry Smith     PetscInt           i, n;
2660fa22f6d0SBarry Smith 
2661fa22f6d0SBarry Smith     if (!mat->xx1) {
26629566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->xx1));
26639566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->bb1));
2664fa22f6d0SBarry Smith     }
2665fa22f6d0SBarry Smith     xx1 = mat->xx1;
2666fa22f6d0SBarry Smith     bb1 = mat->bb1;
2667fa22f6d0SBarry Smith 
26689566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2669fa22f6d0SBarry Smith 
2670fa22f6d0SBarry Smith     if (!mat->diag) {
2671effcda25SBarry Smith       /* this is wrong for same matrix with new nonzero values */
26729566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
26739566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin, mat->diag));
2674fa22f6d0SBarry Smith     }
26759566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2676fa22f6d0SBarry Smith 
2677fa22f6d0SBarry Smith     if (hasop) {
26789566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
26799566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
268020f1ed55SBarry Smith     } else {
268120f1ed55SBarry Smith       /*
268220f1ed55SBarry Smith           These two lines are replaced by code that may be a bit faster for a good compiler
26839566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
26849566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
268520f1ed55SBarry Smith       */
26869566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1a, &sl));
26879566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(mat->diag, &diag));
26889566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26899566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26909566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(xx, &n));
2691887ee2caSBarry Smith       if (omega == 1.0) {
269226fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
26939566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0 * n));
2694887ee2caSBarry Smith       } else {
269526fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
26969566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(3.0 * n));
2697887ee2caSBarry Smith       }
26989566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
26999566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
27009566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
27019566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
270220f1ed55SBarry Smith     }
2703fa22f6d0SBarry Smith 
2704fa22f6d0SBarry Smith     /* multiply off-diagonal portion of matrix */
27059566063dSJacob Faibussowitsch     PetscCall(VecSet(mat->slvec1b, 0.0));
27069566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
27079566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mat->slvec0, &from));
27089566063dSJacob Faibussowitsch     PetscCall(VecGetArray(xx, &x));
27099566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(from, x, bs * mbs));
27109566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mat->slvec0, &from));
27119566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(xx, &x));
27129566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27139566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27149566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2715fa22f6d0SBarry Smith 
2716fa22f6d0SBarry Smith     /* local sweep */
27179566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
27189566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx, 1.0, xx1));
2719f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
27203ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2721ffe4fb16SHong Zhang }
2722ffe4fb16SHong Zhang 
2723dfb205c3SBarry Smith /*@
272411a5261eSBarry Smith      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2725dfb205c3SBarry Smith          CSR format the local rows.
2726dfb205c3SBarry Smith 
2727d083f849SBarry Smith    Collective
2728dfb205c3SBarry Smith 
2729dfb205c3SBarry Smith    Input Parameters:
2730dfb205c3SBarry Smith +  comm - MPI communicator
2731dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
273211a5261eSBarry Smith .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2733dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
273411a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
27352ef1f0ffSBarry Smith        calculated if `N` is given) For square matrices `n` is almost always `m`.
27362ef1f0ffSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
27372ef1f0ffSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2738483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2739dfb205c3SBarry Smith .   j - column indices
2740dfb205c3SBarry Smith -   a - matrix values
2741dfb205c3SBarry Smith 
2742dfb205c3SBarry Smith    Output Parameter:
2743dfb205c3SBarry Smith .   mat - the matrix
2744dfb205c3SBarry Smith 
2745dfb205c3SBarry Smith    Level: intermediate
2746dfb205c3SBarry Smith 
2747dfb205c3SBarry Smith    Notes:
27482ef1f0ffSBarry Smith        The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
27492ef1f0ffSBarry Smith      thus you CANNOT change the matrix entries by changing the values of `a` after you have
27502ef1f0ffSBarry Smith      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2751dfb205c3SBarry Smith 
27522ef1f0ffSBarry Smith        The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2753dfb205c3SBarry Smith 
2754*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2755db781477SPatrick Sanan           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2756dfb205c3SBarry Smith @*/
2757d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2758d71ae5a4SJacob Faibussowitsch {
2759dfb205c3SBarry Smith   PetscFunctionBegin;
27605f80ce2aSJacob Faibussowitsch   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
27615f80ce2aSJacob Faibussowitsch   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
27629566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
27639566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
27649566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPISBAIJ));
27659566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
27663ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2767dfb205c3SBarry Smith }
2768dfb205c3SBarry Smith 
2769dfb205c3SBarry Smith /*@C
277011a5261eSBarry Smith    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2771dfb205c3SBarry Smith 
2772d083f849SBarry Smith    Collective
2773dfb205c3SBarry Smith 
2774dfb205c3SBarry Smith    Input Parameters:
27751c4f3114SJed Brown +  B - the matrix
2776dfb205c3SBarry Smith .  bs - the block size
27772ef1f0ffSBarry Smith .  i - the indices into `j` for the start of each local row (starts with zero)
2778dfb205c3SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2779dfb205c3SBarry Smith -  v - optional values in the matrix
2780dfb205c3SBarry Smith 
2781664954b6SBarry Smith    Level: advanced
2782664954b6SBarry Smith 
2783664954b6SBarry Smith    Notes:
27840cd7f59aSBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
27850cd7f59aSBarry Smith    and usually the numerical values as well
27860cd7f59aSBarry Smith 
278750c5228eSBarry Smith    Any entries below the diagonal are ignored
2788dfb205c3SBarry Smith 
2789*1cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2790dfb205c3SBarry Smith @*/
2791d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2792d71ae5a4SJacob Faibussowitsch {
2793dfb205c3SBarry Smith   PetscFunctionBegin;
2794cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
27953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2796dfb205c3SBarry Smith }
2797dfb205c3SBarry Smith 
2798d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2799d71ae5a4SJacob Faibussowitsch {
280010c56fdeSHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
280110c56fdeSHong Zhang   PetscInt    *indx;
280210c56fdeSHong Zhang   PetscScalar *values;
2803dfb205c3SBarry Smith 
28044dcd73b1SHong Zhang   PetscFunctionBegin;
28059566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
280610c56fdeSHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
280710c56fdeSHong Zhang     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2808de25e9cbSPierre Jolivet     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
280910c56fdeSHong Zhang     PetscInt     *bindx, rmax = a->rmax, j;
2810de25e9cbSPierre Jolivet     PetscMPIInt   rank, size;
28114dcd73b1SHong Zhang 
28129566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28139371c9d4SSatish Balay     mbs = m / bs;
28149371c9d4SSatish Balay     Nbs = N / cbs;
281548a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2816da91a574SPierre Jolivet     nbs = n / cbs;
28174dcd73b1SHong Zhang 
28189566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(rmax, &bindx));
2819d0609cedSBarry Smith     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2820de25e9cbSPierre Jolivet 
28219566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
28229566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &size));
2823de25e9cbSPierre Jolivet     if (rank == size - 1) {
2824de25e9cbSPierre Jolivet       /* Check sum(nbs) = Nbs */
28255f80ce2aSJacob Faibussowitsch       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2826de25e9cbSPierre Jolivet     }
2827de25e9cbSPierre Jolivet 
2828d0609cedSBarry Smith     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
28299566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
283010c56fdeSHong Zhang     for (i = 0; i < mbs; i++) {
28319566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
28324dcd73b1SHong Zhang       nnz = nnz / bs;
28334dcd73b1SHong Zhang       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
28349566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
28359566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
28364dcd73b1SHong Zhang     }
28379566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28389566063dSJacob Faibussowitsch     PetscCall(PetscFree(bindx));
28394dcd73b1SHong Zhang 
28409566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
28419566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
28429566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
28439566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, MATSBAIJ));
28449566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
28459566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2846d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
28474dcd73b1SHong Zhang   }
28484dcd73b1SHong Zhang 
284910c56fdeSHong Zhang   /* numeric phase */
28509566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28519566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
28524dcd73b1SHong Zhang 
28539566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
28544dcd73b1SHong Zhang   for (i = 0; i < m; i++) {
28559566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28564dcd73b1SHong Zhang     Ii = i + rstart;
28579566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
28589566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28594dcd73b1SHong Zhang   }
28609566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28619566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
28629566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
28633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28644dcd73b1SHong Zhang }
2865