xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 17ea310b7b37a5c1e54af8b7921a3ef221a1e68c)
1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
4c6db04a5SJed Brown #include <petscblaslapack.h>
5a30f8f8cSSatish Balay 
626cec326SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
726cec326SBarry Smith {
826cec326SBarry Smith   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
926cec326SBarry Smith 
1026cec326SBarry Smith   PetscFunctionBegin;
1126cec326SBarry Smith #if defined(PETSC_USE_LOG)
1226cec326SBarry Smith   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
1326cec326SBarry Smith #endif
1426cec326SBarry Smith   PetscCall(MatStashDestroy_Private(&mat->stash));
1526cec326SBarry Smith   PetscCall(MatStashDestroy_Private(&mat->bstash));
1626cec326SBarry Smith   PetscCall(MatDestroy(&baij->A));
1726cec326SBarry Smith   PetscCall(MatDestroy(&baij->B));
1826cec326SBarry Smith #if defined(PETSC_USE_CTABLE)
1926cec326SBarry Smith   PetscCall(PetscHMapIDestroy(&baij->colmap));
2026cec326SBarry Smith #else
2126cec326SBarry Smith   PetscCall(PetscFree(baij->colmap));
2226cec326SBarry Smith #endif
2326cec326SBarry Smith   PetscCall(PetscFree(baij->garray));
2426cec326SBarry Smith   PetscCall(VecDestroy(&baij->lvec));
2526cec326SBarry Smith   PetscCall(VecScatterDestroy(&baij->Mvctx));
2626cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec0));
2726cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec0b));
2826cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1));
2926cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1a));
3026cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1b));
3126cec326SBarry Smith   PetscCall(VecScatterDestroy(&baij->sMvctx));
3226cec326SBarry Smith   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
3326cec326SBarry Smith   PetscCall(PetscFree(baij->barray));
3426cec326SBarry Smith   PetscCall(PetscFree(baij->hd));
3526cec326SBarry Smith   PetscCall(VecDestroy(&baij->diag));
3626cec326SBarry Smith   PetscCall(VecDestroy(&baij->bb1));
3726cec326SBarry Smith   PetscCall(VecDestroy(&baij->xx1));
3826cec326SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
3926cec326SBarry Smith   PetscCall(PetscFree(baij->setvaluescopy));
4026cec326SBarry Smith #endif
4126cec326SBarry Smith   PetscCall(PetscFree(baij->in_loc));
4226cec326SBarry Smith   PetscCall(PetscFree(baij->v_loc));
4326cec326SBarry Smith   PetscCall(PetscFree(baij->rangebs));
4426cec326SBarry Smith   PetscCall(PetscFree(mat->data));
4526cec326SBarry Smith 
4626cec326SBarry Smith   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
4726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
4826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
4926cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
5026cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
5126cec326SBarry Smith #if defined(PETSC_HAVE_ELEMENTAL)
5226cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
5326cec326SBarry Smith #endif
5426cec326SBarry Smith #if defined(PETSC_HAVE_SCALAPACK)
5526cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
5626cec326SBarry Smith #endif
5726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
5826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
5926cec326SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
6026cec326SBarry Smith }
6126cec326SBarry Smith 
6226cec326SBarry Smith /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
6326cec326SBarry Smith #define TYPE SBAIJ
6426cec326SBarry Smith #define TYPE_SBAIJ
6526cec326SBarry Smith #include "../src/mat/impls/aij/mpi/mpihashmat.h"
6626cec326SBarry Smith #undef TYPE
6726cec326SBarry Smith #undef TYPE_SBAIJ
6826cec326SBarry Smith 
696214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
70cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
716214f412SHong Zhang #endif
72d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
73d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74d24d4204SJose E. Roman #endif
75b147fbf3SStefano Zampini 
76b147fbf3SStefano Zampini /* This could be moved to matimpl.h */
77d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78d71ae5a4SJacob Faibussowitsch {
79b147fbf3SStefano Zampini   Mat       preallocator;
80b147fbf3SStefano Zampini   PetscInt  r, rstart, rend;
81b147fbf3SStefano Zampini   PetscInt  bs, i, m, n, M, N;
82b147fbf3SStefano Zampini   PetscBool cong = PETSC_TRUE;
83b147fbf3SStefano Zampini 
84b147fbf3SStefano Zampini   PetscFunctionBegin;
85b147fbf3SStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
86b147fbf3SStefano Zampini   PetscValidLogicalCollectiveInt(B, nm, 2);
87b147fbf3SStefano Zampini   for (i = 0; i < nm; i++) {
88b147fbf3SStefano Zampini     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
899566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
905f80ce2aSJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91b147fbf3SStefano Zampini   }
92b147fbf3SStefano Zampini   PetscValidLogicalCollectiveBool(B, fill, 5);
939566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(B, &bs));
949566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, &M, &N));
959566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(B, &m, &n));
969566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
979566063dSJacob Faibussowitsch   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
989566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(preallocator, bs));
999566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(preallocator, m, n, M, N));
1009566063dSJacob Faibussowitsch   PetscCall(MatSetUp(preallocator));
1019566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102b147fbf3SStefano Zampini   for (r = rstart; r < rend; ++r) {
103b147fbf3SStefano Zampini     PetscInt           ncols;
104b147fbf3SStefano Zampini     const PetscInt    *row;
105b147fbf3SStefano Zampini     const PetscScalar *vals;
106b147fbf3SStefano Zampini 
107b147fbf3SStefano Zampini     for (i = 0; i < nm; i++) {
1089566063dSJacob Faibussowitsch       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
1099566063dSJacob Faibussowitsch       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
11048a46eb9SPierre Jolivet       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
1119566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112b147fbf3SStefano Zampini     }
113b147fbf3SStefano Zampini   }
1149566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
1159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
1169566063dSJacob Faibussowitsch   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
1179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&preallocator));
1183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
119b147fbf3SStefano Zampini }
120b147fbf3SStefano Zampini 
121d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122d71ae5a4SJacob Faibussowitsch {
123b147fbf3SStefano Zampini   Mat      B;
124b147fbf3SStefano Zampini   PetscInt r;
125b147fbf3SStefano Zampini 
126b147fbf3SStefano Zampini   PetscFunctionBegin;
127b147fbf3SStefano Zampini   if (reuse != MAT_REUSE_MATRIX) {
12828d58a37SPierre Jolivet     PetscBool symm = PETSC_TRUE, isdense;
129b147fbf3SStefano Zampini     PetscInt  bs;
130b147fbf3SStefano Zampini 
1319566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1329566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1339566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, newtype));
1349566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSize(A, &bs));
1359566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSize(B, bs));
1369566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
1379566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
1389566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
13928d58a37SPierre Jolivet     if (!isdense) {
1409566063dSJacob Faibussowitsch       PetscCall(MatGetRowUpperTriangular(A));
1419566063dSJacob Faibussowitsch       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
1429566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowUpperTriangular(A));
14328d58a37SPierre Jolivet     } else {
1449566063dSJacob Faibussowitsch       PetscCall(MatSetUp(B));
14528d58a37SPierre Jolivet     }
14628d58a37SPierre Jolivet   } else {
14728d58a37SPierre Jolivet     B = *newmat;
1489566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(B));
14928d58a37SPierre Jolivet   }
150b147fbf3SStefano Zampini 
1519566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
152b147fbf3SStefano Zampini   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153b147fbf3SStefano Zampini     PetscInt           ncols;
154b147fbf3SStefano Zampini     const PetscInt    *row;
155b147fbf3SStefano Zampini     const PetscScalar *vals;
156b147fbf3SStefano Zampini 
1579566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
1589566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
160b94d7dedSBarry Smith     if (A->hermitian == PETSC_BOOL3_TRUE) {
161eb1ec7c1SStefano Zampini       PetscInt i;
16248a46eb9SPierre Jolivet       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163eb1ec7c1SStefano Zampini     } else {
1649566063dSJacob Faibussowitsch       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165eb1ec7c1SStefano Zampini     }
166eb1ec7c1SStefano Zampini #else
1679566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168eb1ec7c1SStefano Zampini #endif
1699566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170b147fbf3SStefano Zampini   }
1719566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
1729566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1739566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
174b147fbf3SStefano Zampini 
175b147fbf3SStefano Zampini   if (reuse == MAT_INPLACE_MATRIX) {
1769566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
177b147fbf3SStefano Zampini   } else {
178b147fbf3SStefano Zampini     *newmat = B;
179b147fbf3SStefano Zampini   }
1803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
181b147fbf3SStefano Zampini }
182b147fbf3SStefano Zampini 
183d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184d71ae5a4SJacob Faibussowitsch {
185f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
186a30f8f8cSSatish Balay 
187a30f8f8cSSatish Balay   PetscFunctionBegin;
1889566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
1899566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191a30f8f8cSSatish Balay }
192a30f8f8cSSatish Balay 
193d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194d71ae5a4SJacob Faibussowitsch {
195f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
196a30f8f8cSSatish Balay 
197a30f8f8cSSatish Balay   PetscFunctionBegin;
1989566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
1999566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
2003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
201a30f8f8cSSatish Balay }
202a30f8f8cSSatish Balay 
203d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204a8f51744SPierre Jolivet   do { \
205a30f8f8cSSatish Balay     brow = row / bs; \
2069371c9d4SSatish Balay     rp   = aj + ai[brow]; \
2079371c9d4SSatish Balay     ap   = aa + bs2 * ai[brow]; \
2089371c9d4SSatish Balay     rmax = aimax[brow]; \
2099371c9d4SSatish Balay     nrow = ailen[brow]; \
210a30f8f8cSSatish Balay     bcol = col / bs; \
2119371c9d4SSatish Balay     ridx = row % bs; \
2129371c9d4SSatish Balay     cidx = col % bs; \
2139371c9d4SSatish Balay     low  = 0; \
2149371c9d4SSatish Balay     high = nrow; \
215a30f8f8cSSatish Balay     while (high - low > 3) { \
216a30f8f8cSSatish Balay       t = (low + high) / 2; \
217a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
218a30f8f8cSSatish Balay       else low = t; \
219a30f8f8cSSatish Balay     } \
220a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
221a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
222a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
223a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
224a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
225a30f8f8cSSatish Balay         else *bap = value; \
226a30f8f8cSSatish Balay         goto a_noinsert; \
227a30f8f8cSSatish Balay       } \
228a30f8f8cSSatish Balay     } \
229a30f8f8cSSatish Balay     if (a->nonew == 1) goto a_noinsert; \
2305f80ce2aSJacob Faibussowitsch     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232a30f8f8cSSatish Balay     N = nrow++ - 1; \
233a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2349566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2359566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2369566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
238a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
239e56f5c9eSBarry Smith     A->nonzerostate++; \
240a30f8f8cSSatish Balay   a_noinsert:; \
241a30f8f8cSSatish Balay     ailen[brow] = nrow; \
242a8f51744SPierre Jolivet   } while (0)
243e5e170daSBarry Smith 
244d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245a8f51744SPierre Jolivet   do { \
246a30f8f8cSSatish Balay     brow = row / bs; \
2479371c9d4SSatish Balay     rp   = bj + bi[brow]; \
2489371c9d4SSatish Balay     ap   = ba + bs2 * bi[brow]; \
2499371c9d4SSatish Balay     rmax = bimax[brow]; \
2509371c9d4SSatish Balay     nrow = bilen[brow]; \
251a30f8f8cSSatish Balay     bcol = col / bs; \
2529371c9d4SSatish Balay     ridx = row % bs; \
2539371c9d4SSatish Balay     cidx = col % bs; \
2549371c9d4SSatish Balay     low  = 0; \
2559371c9d4SSatish Balay     high = nrow; \
256a30f8f8cSSatish Balay     while (high - low > 3) { \
257a30f8f8cSSatish Balay       t = (low + high) / 2; \
258a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
259a30f8f8cSSatish Balay       else low = t; \
260a30f8f8cSSatish Balay     } \
261a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
262a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
263a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
264a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
265a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
266a30f8f8cSSatish Balay         else *bap = value; \
267a30f8f8cSSatish Balay         goto b_noinsert; \
268a30f8f8cSSatish Balay       } \
269a30f8f8cSSatish Balay     } \
270a30f8f8cSSatish Balay     if (b->nonew == 1) goto b_noinsert; \
2715f80ce2aSJacob Faibussowitsch     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273a30f8f8cSSatish Balay     N = nrow++ - 1; \
274a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2759566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2769566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2779566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
279a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
280e56f5c9eSBarry Smith     B->nonzerostate++; \
281a30f8f8cSSatish Balay   b_noinsert:; \
282a30f8f8cSSatish Balay     bilen[brow] = nrow; \
283a8f51744SPierre Jolivet   } while (0)
284a30f8f8cSSatish Balay 
285a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
286da81f932SPierre Jolivet    Any a(i,j) with i>j input by user is ignored or generates an error
287a30f8f8cSSatish Balay */
288d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289d71ae5a4SJacob Faibussowitsch {
290a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291a30f8f8cSSatish Balay   MatScalar     value;
292ace3abfcSBarry Smith   PetscBool     roworiented = baij->roworiented;
2931302d50aSBarry Smith   PetscInt      i, j, row, col;
294d0f46423SBarry Smith   PetscInt      rstart_orig = mat->rmap->rstart;
295d0f46423SBarry Smith   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296d0f46423SBarry Smith   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
297a30f8f8cSSatish Balay 
298a30f8f8cSSatish Balay   /* Some Variables required in the macro */
299a30f8f8cSSatish Balay   Mat           A     = baij->A;
300a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
3011302d50aSBarry Smith   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302a30f8f8cSSatish Balay   MatScalar    *aa = a->a;
303a30f8f8cSSatish Balay 
304a30f8f8cSSatish Balay   Mat          B     = baij->B;
305a30f8f8cSSatish Balay   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
3061302d50aSBarry Smith   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307a30f8f8cSSatish Balay   MatScalar   *ba = b->a;
308a30f8f8cSSatish Balay 
3091302d50aSBarry Smith   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
3101302d50aSBarry Smith   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
311a30f8f8cSSatish Balay   MatScalar *ap, *bap;
312a30f8f8cSSatish Balay 
313a30f8f8cSSatish Balay   /* for stash */
3140298fd71SBarry Smith   PetscInt   n_loc, *in_loc = NULL;
3150298fd71SBarry Smith   MatScalar *v_loc = NULL;
316a30f8f8cSSatish Balay 
317a30f8f8cSSatish Balay   PetscFunctionBegin;
318a30f8f8cSSatish Balay   if (!baij->donotstash) {
31959ffdab8SBarry Smith     if (n > baij->n_loc) {
3209566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->in_loc));
3219566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->v_loc));
3229566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->in_loc));
3239566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->v_loc));
32426fbe8dcSKarl Rupp 
32559ffdab8SBarry Smith       baij->n_loc = n;
32659ffdab8SBarry Smith     }
32759ffdab8SBarry Smith     in_loc = baij->in_loc;
32859ffdab8SBarry Smith     v_loc  = baij->v_loc;
329a30f8f8cSSatish Balay   }
330a30f8f8cSSatish Balay 
331a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
332a30f8f8cSSatish Balay     if (im[i] < 0) continue;
3335f80ce2aSJacob Faibussowitsch     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335a30f8f8cSSatish Balay       row = im[i] - rstart_orig;                     /* local row index */
336a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
33701b2bd88SHong Zhang         if (im[i] / bs > in[j] / bs) {
33801b2bd88SHong Zhang           if (a->ignore_ltriangular) {
33901b2bd88SHong Zhang             continue; /* ignore lower triangular blocks */
34026fbe8dcSKarl Rupp           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
34101b2bd88SHong Zhang         }
342a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343a30f8f8cSSatish Balay           col  = in[j] - cstart_orig;                    /* local col index */
3449371c9d4SSatish Balay           brow = row / bs;
3459371c9d4SSatish Balay           bcol = col / bs;
346a30f8f8cSSatish Balay           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
348db4deed7SKarl Rupp           else value = v[i + j * m];
349d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
3509566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
352f7d195e4SLawrence Mitchell           continue;
353f7d195e4SLawrence Mitchell         } else {
354f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355f7d195e4SLawrence Mitchell           /* off-diag entry (B) */
356a30f8f8cSSatish Balay           if (mat->was_assembled) {
35748a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
359eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
36071730473SSatish Balay             col = col - 1;
361a30f8f8cSSatish Balay #else
36271730473SSatish Balay             col = baij->colmap[in[j] / bs] - 1;
363a30f8f8cSSatish Balay #endif
364a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
3659566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
366a30f8f8cSSatish Balay               col = in[j];
367a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368a30f8f8cSSatish Balay               B     = baij->B;
369a30f8f8cSSatish Balay               b     = (Mat_SeqBAIJ *)(B)->data;
3709371c9d4SSatish Balay               bimax = b->imax;
3719371c9d4SSatish Balay               bi    = b->i;
3729371c9d4SSatish Balay               bilen = b->ilen;
3739371c9d4SSatish Balay               bj    = b->j;
374a30f8f8cSSatish Balay               ba    = b->a;
37571730473SSatish Balay             } else col += in[j] % bs;
376a30f8f8cSSatish Balay           } else col = in[j];
377db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
378db4deed7SKarl Rupp           else value = v[i + j * m];
379d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
3809566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381a30f8f8cSSatish Balay         }
382a30f8f8cSSatish Balay       }
383a30f8f8cSSatish Balay     } else { /* off processor entry */
3845f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385a30f8f8cSSatish Balay       if (!baij->donotstash) {
3865080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
387a30f8f8cSSatish Balay         n_loc          = 0;
388a30f8f8cSSatish Balay         for (j = 0; j < n; j++) {
389f65c83cfSHong Zhang           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
391a30f8f8cSSatish Balay           if (roworiented) {
392a30f8f8cSSatish Balay             v_loc[n_loc] = v[i * n + j];
393a30f8f8cSSatish Balay           } else {
394a30f8f8cSSatish Balay             v_loc[n_loc] = v[j * m + i];
395a30f8f8cSSatish Balay           }
396a30f8f8cSSatish Balay           n_loc++;
397a30f8f8cSSatish Balay         }
3989566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399a30f8f8cSSatish Balay       }
400a30f8f8cSSatish Balay     }
401a30f8f8cSSatish Balay   }
4023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
403a30f8f8cSSatish Balay }
404a30f8f8cSSatish Balay 
405d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406d71ae5a4SJacob Faibussowitsch {
40736bd2089SBarry Smith   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
40836bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
40936bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
41036bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
41136bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
41236bd2089SBarry Smith   const PetscScalar *value       = v;
41336bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
41436bd2089SBarry Smith 
41536bd2089SBarry Smith   PetscFunctionBegin;
41636bd2089SBarry Smith   if (col < row) {
4173ba16761SJacob Faibussowitsch     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
41836bd2089SBarry Smith     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
41936bd2089SBarry Smith   }
42036bd2089SBarry Smith   rp    = aj + ai[row];
42136bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
42236bd2089SBarry Smith   rmax  = imax[row];
42336bd2089SBarry Smith   nrow  = ailen[row];
42436bd2089SBarry Smith   value = v;
42536bd2089SBarry Smith   low   = 0;
42636bd2089SBarry Smith   high  = nrow;
42736bd2089SBarry Smith 
42836bd2089SBarry Smith   while (high - low > 7) {
42936bd2089SBarry Smith     t = (low + high) / 2;
43036bd2089SBarry Smith     if (rp[t] > col) high = t;
43136bd2089SBarry Smith     else low = t;
43236bd2089SBarry Smith   }
43336bd2089SBarry Smith   for (i = low; i < high; i++) {
43436bd2089SBarry Smith     if (rp[i] > col) break;
43536bd2089SBarry Smith     if (rp[i] == col) {
43636bd2089SBarry Smith       bap = ap + bs2 * i;
43736bd2089SBarry Smith       if (roworiented) {
43836bd2089SBarry Smith         if (is == ADD_VALUES) {
43936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
440ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
44136bd2089SBarry Smith           }
44236bd2089SBarry Smith         } else {
44336bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
444ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
44536bd2089SBarry Smith           }
44636bd2089SBarry Smith         }
44736bd2089SBarry Smith       } else {
44836bd2089SBarry Smith         if (is == ADD_VALUES) {
44936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
450ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
45136bd2089SBarry Smith           }
45236bd2089SBarry Smith         } else {
45336bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
454ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
45536bd2089SBarry Smith           }
45636bd2089SBarry Smith         }
45736bd2089SBarry Smith       }
45836bd2089SBarry Smith       goto noinsert2;
45936bd2089SBarry Smith     }
46036bd2089SBarry Smith   }
46136bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
4625f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
46336bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4649371c9d4SSatish Balay   N = nrow++ - 1;
4659371c9d4SSatish Balay   high++;
46636bd2089SBarry Smith   /* shift up all the later entries in this row */
4679566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4689566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
46936bd2089SBarry Smith   rp[i] = col;
47036bd2089SBarry Smith   bap   = ap + bs2 * i;
47136bd2089SBarry Smith   if (roworiented) {
47236bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
473ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
47436bd2089SBarry Smith     }
47536bd2089SBarry Smith   } else {
47636bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
477ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
47836bd2089SBarry Smith     }
47936bd2089SBarry Smith   }
48036bd2089SBarry Smith noinsert2:;
48136bd2089SBarry Smith   ailen[row] = nrow;
4823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
48336bd2089SBarry Smith }
48436bd2089SBarry Smith 
48536bd2089SBarry Smith /*
48636bd2089SBarry Smith    This routine is exactly duplicated in mpibaij.c
48736bd2089SBarry Smith */
488d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489d71ae5a4SJacob Faibussowitsch {
49036bd2089SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
49136bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
49236bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
49336bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
49436bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
49536bd2089SBarry Smith   const PetscScalar *value       = v;
49636bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
49736bd2089SBarry Smith 
49836bd2089SBarry Smith   PetscFunctionBegin;
49936bd2089SBarry Smith   rp    = aj + ai[row];
50036bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
50136bd2089SBarry Smith   rmax  = imax[row];
50236bd2089SBarry Smith   nrow  = ailen[row];
50336bd2089SBarry Smith   low   = 0;
50436bd2089SBarry Smith   high  = nrow;
50536bd2089SBarry Smith   value = v;
50636bd2089SBarry Smith   while (high - low > 7) {
50736bd2089SBarry Smith     t = (low + high) / 2;
50836bd2089SBarry Smith     if (rp[t] > col) high = t;
50936bd2089SBarry Smith     else low = t;
51036bd2089SBarry Smith   }
51136bd2089SBarry Smith   for (i = low; i < high; i++) {
51236bd2089SBarry Smith     if (rp[i] > col) break;
51336bd2089SBarry Smith     if (rp[i] == col) {
51436bd2089SBarry Smith       bap = ap + bs2 * i;
51536bd2089SBarry Smith       if (roworiented) {
51636bd2089SBarry Smith         if (is == ADD_VALUES) {
51736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
518ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
51936bd2089SBarry Smith           }
52036bd2089SBarry Smith         } else {
52136bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
522ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
52336bd2089SBarry Smith           }
52436bd2089SBarry Smith         }
52536bd2089SBarry Smith       } else {
52636bd2089SBarry Smith         if (is == ADD_VALUES) {
52736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
528ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
52936bd2089SBarry Smith             bap += bs;
53036bd2089SBarry Smith           }
53136bd2089SBarry Smith         } else {
53236bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
533ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
53436bd2089SBarry Smith             bap += bs;
53536bd2089SBarry Smith           }
53636bd2089SBarry Smith         }
53736bd2089SBarry Smith       }
53836bd2089SBarry Smith       goto noinsert2;
53936bd2089SBarry Smith     }
54036bd2089SBarry Smith   }
54136bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
5425f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
54336bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
5449371c9d4SSatish Balay   N = nrow++ - 1;
5459371c9d4SSatish Balay   high++;
54636bd2089SBarry Smith   /* shift up all the later entries in this row */
5479566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
5489566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
54936bd2089SBarry Smith   rp[i] = col;
55036bd2089SBarry Smith   bap   = ap + bs2 * i;
55136bd2089SBarry Smith   if (roworiented) {
55236bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
553ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
55436bd2089SBarry Smith     }
55536bd2089SBarry Smith   } else {
55636bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
557ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
55836bd2089SBarry Smith     }
55936bd2089SBarry Smith   }
56036bd2089SBarry Smith noinsert2:;
56136bd2089SBarry Smith   ailen[row] = nrow;
5623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
56336bd2089SBarry Smith }
56436bd2089SBarry Smith 
56536bd2089SBarry Smith /*
56636bd2089SBarry Smith     This routine could be optimized by removing the need for the block copy below and passing stride information
56736bd2089SBarry Smith   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
56836bd2089SBarry Smith */
569d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570d71ae5a4SJacob Faibussowitsch {
5710880e062SHong Zhang   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
572f15d580aSBarry Smith   const MatScalar *value;
573f15d580aSBarry Smith   MatScalar       *barray      = baij->barray;
574ace3abfcSBarry Smith   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575899cda47SBarry Smith   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
576476417e5SBarry Smith   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577476417e5SBarry Smith   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
5780880e062SHong Zhang 
579a30f8f8cSSatish Balay   PetscFunctionBegin;
5800880e062SHong Zhang   if (!barray) {
5819566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2, &barray));
5820880e062SHong Zhang     baij->barray = barray;
5830880e062SHong Zhang   }
5840880e062SHong Zhang 
5850880e062SHong Zhang   if (roworiented) {
5860880e062SHong Zhang     stepval = (n - 1) * bs;
5870880e062SHong Zhang   } else {
5880880e062SHong Zhang     stepval = (m - 1) * bs;
5890880e062SHong Zhang   }
5900880e062SHong Zhang   for (i = 0; i < m; i++) {
5910880e062SHong Zhang     if (im[i] < 0) continue;
5926bdcaf15SBarry Smith     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
5930880e062SHong Zhang     if (im[i] >= rstart && im[i] < rend) {
5940880e062SHong Zhang       row = im[i] - rstart;
5950880e062SHong Zhang       for (j = 0; j < n; j++) {
596f3f98c53SJed Brown         if (im[i] > in[j]) {
597f3f98c53SJed Brown           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598e32f2f54SBarry Smith           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599f3f98c53SJed Brown         }
6000880e062SHong Zhang         /* If NumCol = 1 then a copy is not required */
6010880e062SHong Zhang         if ((roworiented) && (n == 1)) {
602f15d580aSBarry Smith           barray = (MatScalar *)v + i * bs2;
6030880e062SHong Zhang         } else if ((!roworiented) && (m == 1)) {
604f15d580aSBarry Smith           barray = (MatScalar *)v + j * bs2;
6050880e062SHong Zhang         } else { /* Here a copy is required */
6060880e062SHong Zhang           if (roworiented) {
6070880e062SHong Zhang             value = v + i * (stepval + bs) * bs + j * bs;
6080880e062SHong Zhang           } else {
6090880e062SHong Zhang             value = v + j * (stepval + bs) * bs + i * bs;
6100880e062SHong Zhang           }
6110880e062SHong Zhang           for (ii = 0; ii < bs; ii++, value += stepval) {
612ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
6130880e062SHong Zhang           }
6140880e062SHong Zhang           barray -= bs2;
6150880e062SHong Zhang         }
6160880e062SHong Zhang 
6170880e062SHong Zhang         if (in[j] >= cstart && in[j] < cend) {
6180880e062SHong Zhang           col = in[j] - cstart;
6199566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
621f7d195e4SLawrence Mitchell           continue;
622f7d195e4SLawrence Mitchell         } else {
623f7d195e4SLawrence Mitchell           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
6240880e062SHong Zhang           if (mat->was_assembled) {
62548a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
6260880e062SHong Zhang 
6272515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
6280880e062SHong Zhang   #if defined(PETSC_USE_CTABLE)
6299371c9d4SSatish Balay             {
6309371c9d4SSatish Balay               PetscInt data;
631eec179cfSJacob Faibussowitsch               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
63208401ef6SPierre Jolivet               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
6330880e062SHong Zhang             }
6340880e062SHong Zhang   #else
63508401ef6SPierre Jolivet             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
6360880e062SHong Zhang   #endif
6370880e062SHong Zhang #endif
6380880e062SHong Zhang #if defined(PETSC_USE_CTABLE)
639eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
6400880e062SHong Zhang             col = (col - 1) / bs;
6410880e062SHong Zhang #else
6420880e062SHong Zhang             col = (baij->colmap[in[j]] - 1) / bs;
6430880e062SHong Zhang #endif
6440880e062SHong Zhang             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
6459566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
6460880e062SHong Zhang               col = in[j];
6470880e062SHong Zhang             }
64826fbe8dcSKarl Rupp           } else col = in[j];
6499566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
6500880e062SHong Zhang         }
6510880e062SHong Zhang       }
6520880e062SHong Zhang     } else {
6535f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
6540880e062SHong Zhang       if (!baij->donotstash) {
6550880e062SHong Zhang         if (roworiented) {
6569566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
6570880e062SHong Zhang         } else {
6589566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
6590880e062SHong Zhang         }
6600880e062SHong Zhang       }
6610880e062SHong Zhang     }
6620880e062SHong Zhang   }
6633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
664a30f8f8cSSatish Balay }
665a30f8f8cSSatish Balay 
666d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667d71ae5a4SJacob Faibussowitsch {
668f3566a2aSHong Zhang   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669d0f46423SBarry Smith   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670d0f46423SBarry Smith   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
671a30f8f8cSSatish Balay 
672a30f8f8cSSatish Balay   PetscFunctionBegin;
673a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
67454c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
67554c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
678a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
67954c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
68054c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
682a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
6839566063dSJacob Faibussowitsch           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684a30f8f8cSSatish Balay         } else {
68548a46eb9SPierre Jolivet           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
687eec179cfSJacob Faibussowitsch           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688a30f8f8cSSatish Balay           data--;
689a30f8f8cSSatish Balay #else
690a30f8f8cSSatish Balay           data = baij->colmap[idxn[j] / bs] - 1;
691a30f8f8cSSatish Balay #endif
692a30f8f8cSSatish Balay           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693a30f8f8cSSatish Balay           else {
694a30f8f8cSSatish Balay             col = data + idxn[j] % bs;
6959566063dSJacob Faibussowitsch             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696a30f8f8cSSatish Balay           }
697a30f8f8cSSatish Balay         }
698a30f8f8cSSatish Balay       }
699f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700a30f8f8cSSatish Balay   }
7013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
702a30f8f8cSSatish Balay }
703a30f8f8cSSatish Balay 
704d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705d71ae5a4SJacob Faibussowitsch {
706a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707a30f8f8cSSatish Balay   PetscReal     sum[2], *lnorm2;
708a30f8f8cSSatish Balay 
709a30f8f8cSSatish Balay   PetscFunctionBegin;
710a30f8f8cSSatish Balay   if (baij->size == 1) {
7119566063dSJacob Faibussowitsch     PetscCall(MatNorm(baij->A, type, norm));
712a30f8f8cSSatish Balay   } else {
713a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
7149566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(2, &lnorm2));
7159566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->A, type, lnorm2));
7169371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
7179371c9d4SSatish Balay       lnorm2++; /* squar power of norm(A) */
7189566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->B, type, lnorm2));
7199371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
7209371c9d4SSatish Balay       lnorm2--; /* squar power of norm(B) */
7211c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7228f1a2a5eSBarry Smith       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
7239566063dSJacob Faibussowitsch       PetscCall(PetscFree(lnorm2));
7240b8dc8d2SHong Zhang     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
7250b8dc8d2SHong Zhang       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
7260b8dc8d2SHong Zhang       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
7270b8dc8d2SHong Zhang       PetscReal    *rsum, *rsum2, vabs;
728899cda47SBarry Smith       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729d0f46423SBarry Smith       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
7300b8dc8d2SHong Zhang       MatScalar    *v;
7310b8dc8d2SHong Zhang 
7329566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
7339566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
7340b8dc8d2SHong Zhang       /* Amat */
7359371c9d4SSatish Balay       v  = amat->a;
7369371c9d4SSatish Balay       jj = amat->j;
7370b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
7380b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
7390b8dc8d2SHong Zhang         nz   = amat->i[brow + 1] - amat->i[brow];
7400b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
7419371c9d4SSatish Balay           gcol = bs * (rstart + *jj);
7429371c9d4SSatish Balay           jj++;
7430b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7440b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7459371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7469371c9d4SSatish Balay               v++;
7470b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7480b8dc8d2SHong Zhang               /* non-diagonal block */
7490b8dc8d2SHong Zhang               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
7500b8dc8d2SHong Zhang             }
7510b8dc8d2SHong Zhang           }
7520b8dc8d2SHong Zhang         }
7539566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7540b8dc8d2SHong Zhang       }
7550b8dc8d2SHong Zhang       /* Bmat */
7569371c9d4SSatish Balay       v  = bmat->a;
7579371c9d4SSatish Balay       jj = bmat->j;
7580b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
7590b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
7600b8dc8d2SHong Zhang         nz   = bmat->i[brow + 1] - bmat->i[brow];
7610b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
7629371c9d4SSatish Balay           gcol = bs * garray[*jj];
7639371c9d4SSatish Balay           jj++;
7640b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7650b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7669371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7679371c9d4SSatish Balay               v++;
7680b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7690b8dc8d2SHong Zhang               rsum[grow + row] += vabs;
7700b8dc8d2SHong Zhang             }
7710b8dc8d2SHong Zhang           }
7720b8dc8d2SHong Zhang         }
7739566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7740b8dc8d2SHong Zhang       }
7751c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7760b8dc8d2SHong Zhang       *norm = 0.0;
777d0f46423SBarry Smith       for (col = 0; col < mat->cmap->N; col++) {
7780b8dc8d2SHong Zhang         if (rsum2[col] > *norm) *norm = rsum2[col];
7790b8dc8d2SHong Zhang       }
7809566063dSJacob Faibussowitsch       PetscCall(PetscFree2(rsum, rsum2));
781f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782a30f8f8cSSatish Balay   }
7833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
784a30f8f8cSSatish Balay }
785a30f8f8cSSatish Balay 
786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787d71ae5a4SJacob Faibussowitsch {
788a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
7891302d50aSBarry Smith   PetscInt      nstash, reallocs;
790a30f8f8cSSatish Balay 
791a30f8f8cSSatish Balay   PetscFunctionBegin;
7923ba16761SJacob Faibussowitsch   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
793a30f8f8cSSatish Balay 
7949566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
7959566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
7969566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7979566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
7989566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7999566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
8003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
801a30f8f8cSSatish Balay }
802a30f8f8cSSatish Balay 
803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804d71ae5a4SJacob Faibussowitsch {
805a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
80713f74950SBarry Smith   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
808e44c0bd4SBarry Smith   PetscInt     *row, *col;
809ace3abfcSBarry Smith   PetscBool     other_disassembled;
81013f74950SBarry Smith   PetscMPIInt   n;
811ace3abfcSBarry Smith   PetscBool     r1, r2, r3;
812a30f8f8cSSatish Balay   MatScalar    *val;
813a30f8f8cSSatish Balay 
81491c97fd4SSatish Balay   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815a30f8f8cSSatish Balay   PetscFunctionBegin;
8164cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
817a30f8f8cSSatish Balay     while (1) {
8189566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819a30f8f8cSSatish Balay       if (!flg) break;
820a30f8f8cSSatish Balay 
821a30f8f8cSSatish Balay       for (i = 0; i < n;) {
822a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
82326fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
82426fbe8dcSKarl Rupp           if (row[j] != rstart) break;
82526fbe8dcSKarl Rupp         }
826a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
827a30f8f8cSSatish Balay         else ncols = n - i;
828a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
8299566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830a30f8f8cSSatish Balay         i = j;
831a30f8f8cSSatish Balay       }
832a30f8f8cSSatish Balay     }
8339566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
834a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
835a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
836a30f8f8cSSatish Balay        restore the original flags */
837a30f8f8cSSatish Balay     r1 = baij->roworiented;
838a30f8f8cSSatish Balay     r2 = a->roworiented;
83991c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
84026fbe8dcSKarl Rupp 
841a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
842a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
84326fbe8dcSKarl Rupp 
84491c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845a30f8f8cSSatish Balay     while (1) {
8469566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847a30f8f8cSSatish Balay       if (!flg) break;
848a30f8f8cSSatish Balay 
849a30f8f8cSSatish Balay       for (i = 0; i < n;) {
850a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
85126fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
85226fbe8dcSKarl Rupp           if (row[j] != rstart) break;
85326fbe8dcSKarl Rupp         }
854a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
855a30f8f8cSSatish Balay         else ncols = n - i;
8569566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857a30f8f8cSSatish Balay         i = j;
858a30f8f8cSSatish Balay       }
859a30f8f8cSSatish Balay     }
8609566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
86126fbe8dcSKarl Rupp 
862a30f8f8cSSatish Balay     baij->roworiented = r1;
863a30f8f8cSSatish Balay     a->roworiented    = r2;
86426fbe8dcSKarl Rupp 
86591c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866a30f8f8cSSatish Balay   }
867a30f8f8cSSatish Balay 
8689566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->A, mode));
8699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->A, mode));
870a30f8f8cSSatish Balay 
871a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
8726aad120cSJose E. Roman      also disassemble ourselves, in order that we may reassemble. */
873a30f8f8cSSatish Balay   /*
874a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
875a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
876a30f8f8cSSatish Balay   */
877a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
8785f9db2b2SJunchao Zhang     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
87948a46eb9SPierre Jolivet     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880a30f8f8cSSatish Balay   }
881a30f8f8cSSatish Balay 
8829371c9d4SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
8839566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->B, mode));
8849566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->B, mode));
885a30f8f8cSSatish Balay 
8869566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
88726fbe8dcSKarl Rupp 
888f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
8894f9cfa9eSBarry Smith 
8904f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
8914f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
8931c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894e56f5c9eSBarry Smith   }
8953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
896a30f8f8cSSatish Balay }
897a30f8f8cSSatish Balay 
898dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
8999804daf3SBarry Smith #include <petscdraw.h>
900d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901d71ae5a4SJacob Faibussowitsch {
902a30f8f8cSSatish Balay   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
903d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
9047da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
905ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
906b0a32e0cSBarry Smith   PetscViewer       sviewer;
907f3ef73ceSBarry Smith   PetscViewerFormat format;
908a30f8f8cSSatish Balay 
909a30f8f8cSSatish Balay   PetscFunctionBegin;
9109566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
9119566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
91232077d6dSBarry Smith   if (iascii) {
9139566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
914456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915a30f8f8cSSatish Balay       MatInfo info;
9169566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
9179566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
9189566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9199371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
9209371c9d4SSatish Balay                                                    mat->rmap->bs, (double)info.memory));
9219566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
9229566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
9239566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
9249566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
9259566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9269566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
9279566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
9289566063dSJacob Faibussowitsch       PetscCall(VecScatterView(baij->Mvctx, viewer));
9293ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
930fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
9319566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
9323ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
933c1490034SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
9343ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
935a30f8f8cSSatish Balay     }
936a30f8f8cSSatish Balay   }
937a30f8f8cSSatish Balay 
938a30f8f8cSSatish Balay   if (isdraw) {
939b0a32e0cSBarry Smith     PetscDraw draw;
940ace3abfcSBarry Smith     PetscBool isnull;
9419566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
9429566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
9433ba16761SJacob Faibussowitsch     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944a30f8f8cSSatish Balay   }
945a30f8f8cSSatish Balay 
9467da1fb6eSBarry Smith   {
947a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
948a30f8f8cSSatish Balay     Mat           A;
94965d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
95065d70643SHong Zhang     Mat_SeqBAIJ  *Bloc;
951d0f46423SBarry Smith     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952a30f8f8cSSatish Balay     MatScalar    *a;
9533e219373SBarry Smith     const char   *matname;
954a30f8f8cSSatish Balay 
955f204ca49SKris Buschelman     /* Should this be the same type as mat? */
9569566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957dd400576SPatrick Sanan     if (rank == 0) {
9589566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
959a30f8f8cSSatish Balay     } else {
9609566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
961a30f8f8cSSatish Balay     }
9629566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPISBAIJ));
9639566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
9649566063dSJacob Faibussowitsch     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
965a30f8f8cSSatish Balay 
966a30f8f8cSSatish Balay     /* copy over the A part */
96765d70643SHong Zhang     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
9689371c9d4SSatish Balay     ai   = Aloc->i;
9699371c9d4SSatish Balay     aj   = Aloc->j;
9709371c9d4SSatish Balay     a    = Aloc->a;
9719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs, &rvals));
972a30f8f8cSSatish Balay 
973a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
974e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
97526fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
977e9f7bc9eSHong Zhang         col = (baij->cstartbs + aj[j]) * bs;
978a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9799566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
98026fbe8dcSKarl Rupp           col++;
98126fbe8dcSKarl Rupp           a += bs;
982a30f8f8cSSatish Balay         }
983a30f8f8cSSatish Balay       }
984a30f8f8cSSatish Balay     }
985a30f8f8cSSatish Balay     /* copy over the B part */
98665d70643SHong Zhang     Bloc = (Mat_SeqBAIJ *)baij->B->data;
9879371c9d4SSatish Balay     ai   = Bloc->i;
9889371c9d4SSatish Balay     aj   = Bloc->j;
9899371c9d4SSatish Balay     a    = Bloc->a;
990a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
991e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
99226fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
994a30f8f8cSSatish Balay         col = baij->garray[aj[j]] * bs;
995a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9969566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
99726fbe8dcSKarl Rupp           col++;
99826fbe8dcSKarl Rupp           a += bs;
999a30f8f8cSSatish Balay         }
1000a30f8f8cSSatish Balay       }
1001a30f8f8cSSatish Balay     }
10029566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvals));
10039566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
10049566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005a30f8f8cSSatish Balay     /*
1006a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
1007b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
1008a30f8f8cSSatish Balay     */
10099566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
101023a3927dSBarry Smith     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011dd400576SPatrick Sanan     if (rank == 0) {
101223a3927dSBarry Smith       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
10139566063dSJacob Faibussowitsch       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014a30f8f8cSSatish Balay     }
10159566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
10169566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
10179566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
1018a30f8f8cSSatish Balay   }
10193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1020a30f8f8cSSatish Balay }
1021a30f8f8cSSatish Balay 
1022618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1024d1654148SHong Zhang 
1025d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026d71ae5a4SJacob Faibussowitsch {
1027ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
1028a30f8f8cSSatish Balay 
1029a30f8f8cSSatish Balay   PetscFunctionBegin;
10309566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
10319566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
10329566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
10339566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034d1654148SHong Zhang   if (iascii || isdraw || issocket) {
10359566063dSJacob Faibussowitsch     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
10361baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
10373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1038a30f8f8cSSatish Balay }
1039a30f8f8cSSatish Balay 
1040d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041d71ae5a4SJacob Faibussowitsch {
1042547795f9SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1043eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
10446de40e93SBarry Smith   PetscScalar       *from;
10456de40e93SBarry Smith   const PetscScalar *x;
1046547795f9SHong Zhang 
1047547795f9SHong Zhang   PetscFunctionBegin;
1048547795f9SHong Zhang   /* diagonal part */
10499566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1050629a200eSBarry Smith   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1051629a200eSBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1052629a200eSBarry Smith   PetscCall(VecZeroEntries(a->slvec1b));
1053547795f9SHong Zhang 
1054547795f9SHong Zhang   /* subdiagonal part */
10555f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
10569566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1057547795f9SHong Zhang 
1058547795f9SHong Zhang   /* copy x into the vec slvec0 */
10599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1061547795f9SHong Zhang 
10629566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1065547795f9SHong Zhang 
10669566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10679566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1068547795f9SHong Zhang   /* supperdiagonal part */
10699566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
10703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1071547795f9SHong Zhang }
1072547795f9SHong Zhang 
1073d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1074d71ae5a4SJacob Faibussowitsch {
1075a9d4b620SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1076eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1077d9ca1df4SBarry Smith   PetscScalar       *from;
1078d9ca1df4SBarry Smith   const PetscScalar *x;
1079a9d4b620SHong Zhang 
1080a9d4b620SHong Zhang   PetscFunctionBegin;
1081a9d4b620SHong Zhang   /* diagonal part */
10829566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1083629a200eSBarry Smith   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1084629a200eSBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1085629a200eSBarry Smith   PetscCall(VecZeroEntries(a->slvec1b));
1086a9d4b620SHong Zhang 
1087a9d4b620SHong Zhang   /* subdiagonal part */
10889566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1089fc165ae2SBarry Smith 
1090a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
10919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10929566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1093a9d4b620SHong Zhang 
10949566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1097fc165ae2SBarry Smith 
10989566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10999566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1100a9d4b620SHong Zhang   /* supperdiagonal part */
11019566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
11023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1103a9d4b620SHong Zhang }
1104a9d4b620SHong Zhang 
1105d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1106d71ae5a4SJacob Faibussowitsch {
1107eb1ec7c1SStefano Zampini   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1108eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1109629a200eSBarry Smith   PetscScalar       *from;
1110eb1ec7c1SStefano Zampini   const PetscScalar *x;
1111eb1ec7c1SStefano Zampini 
1112eb1ec7c1SStefano Zampini   PetscFunctionBegin;
1113eb1ec7c1SStefano Zampini   /* diagonal part */
11149566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1115629a200eSBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1116629a200eSBarry Smith   PetscCall(VecZeroEntries(a->slvec1b));
1117eb1ec7c1SStefano Zampini 
1118eb1ec7c1SStefano Zampini   /* subdiagonal part */
11195f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
11209566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1121eb1ec7c1SStefano Zampini 
1122eb1ec7c1SStefano Zampini   /* copy x into the vec slvec0 */
11239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11249566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11259566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1127eb1ec7c1SStefano Zampini 
11289566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11309566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1131eb1ec7c1SStefano Zampini 
1132eb1ec7c1SStefano Zampini   /* supperdiagonal part */
11339566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
11343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1135eb1ec7c1SStefano Zampini }
1136eb1ec7c1SStefano Zampini 
1137d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1138d71ae5a4SJacob Faibussowitsch {
1139de8b6608SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1140d0f46423SBarry Smith   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1141629a200eSBarry Smith   PetscScalar       *from;
1142d9ca1df4SBarry Smith   const PetscScalar *x;
1143a9d4b620SHong Zhang 
1144a9d4b620SHong Zhang   PetscFunctionBegin;
1145a9d4b620SHong Zhang   /* diagonal part */
11469566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1147629a200eSBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1148629a200eSBarry Smith   PetscCall(VecZeroEntries(a->slvec1b));
1149a9d4b620SHong Zhang 
1150a9d4b620SHong Zhang   /* subdiagonal part */
11519566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1152a9d4b620SHong Zhang 
1153a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
11549566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11559566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11569566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1158a9d4b620SHong Zhang 
11599566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11619566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1162a9d4b620SHong Zhang 
1163a9d4b620SHong Zhang   /* supperdiagonal part */
11649566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
11653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1166a9d4b620SHong Zhang }
1167a9d4b620SHong Zhang 
1168a30f8f8cSSatish Balay /*
1169a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1170a30f8f8cSSatish Balay    diagonal block
1171a30f8f8cSSatish Balay */
1172d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1173d71ae5a4SJacob Faibussowitsch {
1174a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1175a30f8f8cSSatish Balay 
1176a30f8f8cSSatish Balay   PetscFunctionBegin;
117708401ef6SPierre Jolivet   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
11789566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A, v));
11793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1180a30f8f8cSSatish Balay }
1181a30f8f8cSSatish Balay 
1182d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1183d71ae5a4SJacob Faibussowitsch {
1184a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1185a30f8f8cSSatish Balay 
1186a30f8f8cSSatish Balay   PetscFunctionBegin;
11879566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
11889566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
11893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1190a30f8f8cSSatish Balay }
1191a30f8f8cSSatish Balay 
1192d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1193d71ae5a4SJacob Faibussowitsch {
1194d0d4cfc2SHong Zhang   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1195d0d4cfc2SHong Zhang   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1196d0f46423SBarry Smith   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1197d0f46423SBarry Smith   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1198899cda47SBarry Smith   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1199d0d4cfc2SHong Zhang 
1200a30f8f8cSSatish Balay   PetscFunctionBegin;
12015f80ce2aSJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1202d0d4cfc2SHong Zhang   mat->getrowactive = PETSC_TRUE;
1203d0d4cfc2SHong Zhang 
1204d0d4cfc2SHong Zhang   if (!mat->rowvalues && (idx || v)) {
1205d0d4cfc2SHong Zhang     /*
1206d0d4cfc2SHong Zhang         allocate enough space to hold information from the longest row.
1207d0d4cfc2SHong Zhang     */
1208d0d4cfc2SHong Zhang     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1209d0d4cfc2SHong Zhang     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1210d0d4cfc2SHong Zhang     PetscInt      max = 1, mbs = mat->mbs, tmp;
1211d0d4cfc2SHong Zhang     for (i = 0; i < mbs; i++) {
1212d0d4cfc2SHong Zhang       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
121326fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1214d0d4cfc2SHong Zhang     }
12159566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1216d0d4cfc2SHong Zhang   }
1217d0d4cfc2SHong Zhang 
12185f80ce2aSJacob Faibussowitsch   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1219d0d4cfc2SHong Zhang   lrow = row - brstart; /* local row index */
1220d0d4cfc2SHong Zhang 
12219371c9d4SSatish Balay   pvA = &vworkA;
12229371c9d4SSatish Balay   pcA = &cworkA;
12239371c9d4SSatish Balay   pvB = &vworkB;
12249371c9d4SSatish Balay   pcB = &cworkB;
12259371c9d4SSatish Balay   if (!v) {
12269371c9d4SSatish Balay     pvA = NULL;
12279371c9d4SSatish Balay     pvB = NULL;
12289371c9d4SSatish Balay   }
12299371c9d4SSatish Balay   if (!idx) {
12309371c9d4SSatish Balay     pcA = NULL;
12319371c9d4SSatish Balay     if (!v) pcB = NULL;
12329371c9d4SSatish Balay   }
12339566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
12349566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1235d0d4cfc2SHong Zhang   nztot = nzA + nzB;
1236d0d4cfc2SHong Zhang 
1237d0d4cfc2SHong Zhang   cmap = mat->garray;
1238d0d4cfc2SHong Zhang   if (v || idx) {
1239d0d4cfc2SHong Zhang     if (nztot) {
1240d0d4cfc2SHong Zhang       /* Sort by increasing column numbers, assuming A and B already sorted */
1241d0d4cfc2SHong Zhang       PetscInt imark = -1;
1242d0d4cfc2SHong Zhang       if (v) {
1243d0d4cfc2SHong Zhang         *v = v_p = mat->rowvalues;
1244d0d4cfc2SHong Zhang         for (i = 0; i < nzB; i++) {
1245d0d4cfc2SHong Zhang           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1246d0d4cfc2SHong Zhang           else break;
1247d0d4cfc2SHong Zhang         }
1248d0d4cfc2SHong Zhang         imark = i;
1249d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1250d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1251d0d4cfc2SHong Zhang       }
1252d0d4cfc2SHong Zhang       if (idx) {
1253d0d4cfc2SHong Zhang         *idx = idx_p = mat->rowindices;
1254d0d4cfc2SHong Zhang         if (imark > -1) {
1255ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1256d0d4cfc2SHong Zhang         } else {
1257d0d4cfc2SHong Zhang           for (i = 0; i < nzB; i++) {
125826fbe8dcSKarl Rupp             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259d0d4cfc2SHong Zhang             else break;
1260d0d4cfc2SHong Zhang           }
1261d0d4cfc2SHong Zhang           imark = i;
1262d0d4cfc2SHong Zhang         }
1263d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1264d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1265d0d4cfc2SHong Zhang       }
1266d0d4cfc2SHong Zhang     } else {
1267f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1268f4259b30SLisandro Dalcin       if (v) *v = NULL;
1269d0d4cfc2SHong Zhang     }
1270d0d4cfc2SHong Zhang   }
1271d0d4cfc2SHong Zhang   *nz = nztot;
12729566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
12739566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
12743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1275a30f8f8cSSatish Balay }
1276a30f8f8cSSatish Balay 
1277d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1278d71ae5a4SJacob Faibussowitsch {
1279a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1280a30f8f8cSSatish Balay 
1281a30f8f8cSSatish Balay   PetscFunctionBegin;
12825f80ce2aSJacob Faibussowitsch   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1283a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
12843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1285a30f8f8cSSatish Balay }
1286a30f8f8cSSatish Balay 
1287d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1288d71ae5a4SJacob Faibussowitsch {
1289d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1290d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1291d0d4cfc2SHong Zhang 
1292d0d4cfc2SHong Zhang   PetscFunctionBegin;
1293d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_TRUE;
12943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1295d0d4cfc2SHong Zhang }
1296d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1297d71ae5a4SJacob Faibussowitsch {
1298d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1299d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1300d0d4cfc2SHong Zhang 
1301d0d4cfc2SHong Zhang   PetscFunctionBegin;
1302d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_FALSE;
13033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1304d0d4cfc2SHong Zhang }
1305d0d4cfc2SHong Zhang 
1306d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1307d71ae5a4SJacob Faibussowitsch {
13085f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
13095f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
13102726fb6dSPierre Jolivet     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
13112726fb6dSPierre Jolivet 
13129566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->A));
13139566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->B));
13145f80ce2aSJacob Faibussowitsch   }
13153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13162726fb6dSPierre Jolivet }
13172726fb6dSPierre Jolivet 
1318d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1319d71ae5a4SJacob Faibussowitsch {
132099cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
132199cafbc1SBarry Smith 
132299cafbc1SBarry Smith   PetscFunctionBegin;
13239566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
13249566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
13253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
132699cafbc1SBarry Smith }
132799cafbc1SBarry Smith 
1328d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1329d71ae5a4SJacob Faibussowitsch {
133099cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
133199cafbc1SBarry Smith 
133299cafbc1SBarry Smith   PetscFunctionBegin;
13339566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
13349566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
13353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
133699cafbc1SBarry Smith }
133799cafbc1SBarry Smith 
13387dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
133936032a97SHong Zhang    Input: isrow       - distributed(parallel),
134036032a97SHong Zhang           iscol_local - locally owned (seq)
134136032a97SHong Zhang */
1342d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1343d71ae5a4SJacob Faibussowitsch {
13448f46ffcaSHong Zhang   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
13458f46ffcaSHong Zhang   const PetscInt *ptr1, *ptr2;
134636032a97SHong Zhang 
134736032a97SHong Zhang   PetscFunctionBegin;
13489566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &sz1));
13499566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_local, &sz2));
13501098a8e8SHong Zhang   if (sz1 > sz2) {
13511098a8e8SHong Zhang     *flg = PETSC_FALSE;
13523ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
13531098a8e8SHong Zhang   }
13548f46ffcaSHong Zhang 
13559566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &ptr1));
13569566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &ptr2));
13578f46ffcaSHong Zhang 
13589566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz1, &a1));
13599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz2, &a2));
13609566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a1, ptr1, sz1));
13619566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a2, ptr2, sz2));
13629566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz1, a1));
13639566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz2, a2));
13648f46ffcaSHong Zhang 
13658f46ffcaSHong Zhang   nmatch = 0;
13668f46ffcaSHong Zhang   k      = 0;
13678f46ffcaSHong Zhang   for (i = 0; i < sz1; i++) {
13688f46ffcaSHong Zhang     for (j = k; j < sz2; j++) {
13698f46ffcaSHong Zhang       if (a1[i] == a2[j]) {
13709371c9d4SSatish Balay         k = j;
13719371c9d4SSatish Balay         nmatch++;
13728f46ffcaSHong Zhang         break;
13738f46ffcaSHong Zhang       }
13748f46ffcaSHong Zhang     }
13758f46ffcaSHong Zhang   }
13769566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &ptr1));
13779566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
13789566063dSJacob Faibussowitsch   PetscCall(PetscFree(a1));
13799566063dSJacob Faibussowitsch   PetscCall(PetscFree(a2));
13801098a8e8SHong Zhang   if (nmatch < sz1) {
13811098a8e8SHong Zhang     *flg = PETSC_FALSE;
13821098a8e8SHong Zhang   } else {
13831098a8e8SHong Zhang     *flg = PETSC_TRUE;
13841098a8e8SHong Zhang   }
13853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13868f46ffcaSHong Zhang }
138736032a97SHong Zhang 
1388d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1389d71ae5a4SJacob Faibussowitsch {
139036032a97SHong Zhang   IS        iscol_local;
139136032a97SHong Zhang   PetscInt  csize;
139236032a97SHong Zhang   PetscBool isequal;
139336032a97SHong Zhang 
139436032a97SHong Zhang   PetscFunctionBegin;
13959566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
139636032a97SHong Zhang   if (call == MAT_REUSE_MATRIX) {
13979566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
13985f80ce2aSJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
139936032a97SHong Zhang   } else {
1400068661f9SToby Isaac     PetscBool issorted;
1401068661f9SToby Isaac 
14029566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
14039566063dSJacob Faibussowitsch     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
14049566063dSJacob Faibussowitsch     PetscCall(ISSorted(iscol_local, &issorted));
14055f80ce2aSJacob Faibussowitsch     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
14068f46ffcaSHong Zhang   }
14078f46ffcaSHong Zhang 
14087dae84e0SHong Zhang   /* now call MatCreateSubMatrix_MPIBAIJ() */
14099566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
14108f46ffcaSHong Zhang   if (call == MAT_INITIAL_MATRIX) {
14119566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
14129566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
14138f46ffcaSHong Zhang   }
14143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14158f46ffcaSHong Zhang }
14168f46ffcaSHong Zhang 
1417d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1418d71ae5a4SJacob Faibussowitsch {
1419a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1420a30f8f8cSSatish Balay 
1421a30f8f8cSSatish Balay   PetscFunctionBegin;
14229566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
14239566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
14243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1425a30f8f8cSSatish Balay }
1426a30f8f8cSSatish Balay 
1427d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1428d71ae5a4SJacob Faibussowitsch {
1429a30f8f8cSSatish Balay   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1430a30f8f8cSSatish Balay   Mat            A = a->A, B = a->B;
14313966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
1432a30f8f8cSSatish Balay 
1433a30f8f8cSSatish Balay   PetscFunctionBegin;
1434d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
143526fbe8dcSKarl Rupp 
14369566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
143726fbe8dcSKarl Rupp 
14389371c9d4SSatish Balay   isend[0] = info->nz_used;
14399371c9d4SSatish Balay   isend[1] = info->nz_allocated;
14409371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
14419371c9d4SSatish Balay   isend[3] = info->memory;
14429371c9d4SSatish Balay   isend[4] = info->mallocs;
144326fbe8dcSKarl Rupp 
14449566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
144526fbe8dcSKarl Rupp 
14469371c9d4SSatish Balay   isend[0] += info->nz_used;
14479371c9d4SSatish Balay   isend[1] += info->nz_allocated;
14489371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
14499371c9d4SSatish Balay   isend[3] += info->memory;
14509371c9d4SSatish Balay   isend[4] += info->mallocs;
1451a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1452a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1453a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1454a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1455a30f8f8cSSatish Balay     info->memory       = isend[3];
1456a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1457a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14581c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
145926fbe8dcSKarl Rupp 
1460a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1461a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1462a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1463a30f8f8cSSatish Balay     info->memory       = irecv[3];
1464a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1465a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14661c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
146726fbe8dcSKarl Rupp 
1468a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1469a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1470a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1471a30f8f8cSSatish Balay     info->memory       = irecv[3];
1472a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
147398921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1474a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1475a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1476a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
14773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1478a30f8f8cSSatish Balay }
1479a30f8f8cSSatish Balay 
1480d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1481d71ae5a4SJacob Faibussowitsch {
1482a30f8f8cSSatish Balay   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1483d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1484a30f8f8cSSatish Balay 
1485a30f8f8cSSatish Balay   PetscFunctionBegin;
1486e98b92d7SKris Buschelman   switch (op) {
1487512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1488e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
148928b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1490a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1491c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1492e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
149343674050SBarry Smith     MatCheckPreallocated(A, 1);
14949566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14959566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1496e98b92d7SKris Buschelman     break;
1497e98b92d7SKris Buschelman   case MAT_ROW_ORIENTED:
149843674050SBarry Smith     MatCheckPreallocated(A, 1);
14994e0d8c25SBarry Smith     a->roworiented = flg;
150026fbe8dcSKarl Rupp 
15019566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
15029566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1503e98b92d7SKris Buschelman     break;
15048c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1505d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1506d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1507d71ae5a4SJacob Faibussowitsch     break;
1508d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
1509d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
1510d71ae5a4SJacob Faibussowitsch     break;
1511d71ae5a4SJacob Faibussowitsch   case MAT_USE_HASH_TABLE:
1512d71ae5a4SJacob Faibussowitsch     a->ht_flag = flg;
1513d71ae5a4SJacob Faibussowitsch     break;
1514d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
1515d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1516d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
15170f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1518eb1ec7c1SStefano Zampini     if (flg) { /* need different mat-vec ops */
1519547795f9SHong Zhang       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1520eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1521eb1ec7c1SStefano Zampini       A->ops->multtranspose    = NULL;
1522eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = NULL;
1523b94d7dedSBarry Smith       A->symmetric             = PETSC_BOOL3_FALSE;
1524eb1ec7c1SStefano Zampini     }
15250f2140c7SStefano Zampini #endif
1526eeffb40dSHong Zhang     break;
1527ffa07934SHong Zhang   case MAT_SPD:
1528d71ae5a4SJacob Faibussowitsch   case MAT_SYMMETRIC:
1529d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1530d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1531eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1532eb1ec7c1SStefano Zampini     if (flg) { /* restore to use default mat-vec ops */
1533eb1ec7c1SStefano Zampini       A->ops->mult             = MatMult_MPISBAIJ;
1534eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1535eb1ec7c1SStefano Zampini       A->ops->multtranspose    = MatMult_MPISBAIJ;
1536eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1537eb1ec7c1SStefano Zampini     }
1538eb1ec7c1SStefano Zampini #endif
1539eeffb40dSHong Zhang     break;
154077e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
154143674050SBarry Smith     MatCheckPreallocated(A, 1);
15429566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1543eeffb40dSHong Zhang     break;
15449a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1545b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
15465f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
15479566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
154877e54ba9SKris Buschelman     break;
1549d71ae5a4SJacob Faibussowitsch   case MAT_SPD_ETERNAL:
1550d71ae5a4SJacob Faibussowitsch     break;
1551d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_LOWER_TRIANGULAR:
1552d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1553d71ae5a4SJacob Faibussowitsch     break;
1554d71ae5a4SJacob Faibussowitsch   case MAT_ERROR_LOWER_TRIANGULAR:
1555d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1556d71ae5a4SJacob Faibussowitsch     break;
1557d71ae5a4SJacob Faibussowitsch   case MAT_GETROW_UPPERTRIANGULAR:
1558d71ae5a4SJacob Faibussowitsch     aA->getrow_utriangular = flg;
1559d71ae5a4SJacob Faibussowitsch     break;
1560d71ae5a4SJacob Faibussowitsch   default:
1561d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1562a30f8f8cSSatish Balay   }
15633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1564a30f8f8cSSatish Balay }
1565a30f8f8cSSatish Balay 
1566d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1567d71ae5a4SJacob Faibussowitsch {
1568a30f8f8cSSatish Balay   PetscFunctionBegin;
15697fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1570cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
15719566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1572cf37664fSBarry Smith   } else if (reuse == MAT_REUSE_MATRIX) {
15739566063dSJacob Faibussowitsch     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1574fc4dec0aSBarry Smith   }
15753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1576a30f8f8cSSatish Balay }
1577a30f8f8cSSatish Balay 
1578d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1579d71ae5a4SJacob Faibussowitsch {
1580a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1581a30f8f8cSSatish Balay   Mat           a = baij->A, b = baij->B;
15825e90f9d9SHong Zhang   PetscInt      nv, m, n;
1583ace3abfcSBarry Smith   PetscBool     flg;
1584a30f8f8cSSatish Balay 
1585a30f8f8cSSatish Balay   PetscFunctionBegin;
1586a30f8f8cSSatish Balay   if (ll != rr) {
15879566063dSJacob Faibussowitsch     PetscCall(VecEqual(ll, rr, &flg));
15885f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1589a30f8f8cSSatish Balay   }
15903ba16761SJacob Faibussowitsch   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1591b3bf805bSHong Zhang 
15929566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
15935f80ce2aSJacob Faibussowitsch   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1594b3bf805bSHong Zhang 
15959566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(rr, &nv));
15965f80ce2aSJacob Faibussowitsch   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
15975e90f9d9SHong Zhang 
15989566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
15995e90f9d9SHong Zhang 
16005e90f9d9SHong Zhang   /* left diagonalscale the off-diagonal part */
1601dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
16025e90f9d9SHong Zhang 
16035e90f9d9SHong Zhang   /* scale the diagonal part */
1604dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1605a30f8f8cSSatish Balay 
16065e90f9d9SHong Zhang   /* right diagonalscale the off-diagonal part */
16079566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1608dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
16093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1610a30f8f8cSSatish Balay }
1611a30f8f8cSSatish Balay 
1612d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1613d71ae5a4SJacob Faibussowitsch {
1614f3566a2aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1615a30f8f8cSSatish Balay 
1616a30f8f8cSSatish Balay   PetscFunctionBegin;
16179566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
16183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1619a30f8f8cSSatish Balay }
1620a30f8f8cSSatish Balay 
16216849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1622a30f8f8cSSatish Balay 
1623d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1624d71ae5a4SJacob Faibussowitsch {
1625a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1626a30f8f8cSSatish Balay   Mat           a, b, c, d;
1627ace3abfcSBarry Smith   PetscBool     flg;
1628a30f8f8cSSatish Balay 
1629a30f8f8cSSatish Balay   PetscFunctionBegin;
16309371c9d4SSatish Balay   a = matA->A;
16319371c9d4SSatish Balay   b = matA->B;
16329371c9d4SSatish Balay   c = matB->A;
16339371c9d4SSatish Balay   d = matB->B;
1634a30f8f8cSSatish Balay 
16359566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
163648a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
16371c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
16383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1639a30f8f8cSSatish Balay }
1640a30f8f8cSSatish Balay 
1641d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1642d71ae5a4SJacob Faibussowitsch {
16434c7a3774SStefano Zampini   PetscBool isbaij;
16443c896bc6SHong Zhang 
16453c896bc6SHong Zhang   PetscFunctionBegin;
16469566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
16475f80ce2aSJacob Faibussowitsch   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
16483c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
16493c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
16509566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(A));
16519566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
16529566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(A));
16533c896bc6SHong Zhang   } else {
16544c7a3774SStefano Zampini     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
16554c7a3774SStefano Zampini     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
16564c7a3774SStefano Zampini 
16579566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
16589566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
16593c896bc6SHong Zhang   }
16609566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
16613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16623c896bc6SHong Zhang }
16633c896bc6SHong Zhang 
1664d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1665d71ae5a4SJacob Faibussowitsch {
16664fe895cdSHong Zhang   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
16674fe895cdSHong Zhang   PetscBLASInt  bnz, one                          = 1;
16684fe895cdSHong Zhang   Mat_SeqSBAIJ *xa, *ya;
16694fe895cdSHong Zhang   Mat_SeqBAIJ  *xb, *yb;
16704fe895cdSHong Zhang 
16714fe895cdSHong Zhang   PetscFunctionBegin;
16724fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
16734fe895cdSHong Zhang     PetscScalar alpha = a;
16744fe895cdSHong Zhang     xa                = (Mat_SeqSBAIJ *)xx->A->data;
16754fe895cdSHong Zhang     ya                = (Mat_SeqSBAIJ *)yy->A->data;
16769566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1677792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
16784fe895cdSHong Zhang     xb = (Mat_SeqBAIJ *)xx->B->data;
16794fe895cdSHong Zhang     yb = (Mat_SeqBAIJ *)yy->B->data;
16809566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1681792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
16829566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1683ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
16849566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
16859566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
16869566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
16874fe895cdSHong Zhang   } else {
16884de5dceeSHong Zhang     Mat       B;
16894de5dceeSHong Zhang     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
16905f80ce2aSJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
16919566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(X));
16929566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(Y));
16939566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
16949566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
16959566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
16969566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
16979566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
16989566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
16999566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, MATMPISBAIJ));
17009566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
17019566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
17029566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
17039566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
17049566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
17059566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
17069566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
17079566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(X));
17089566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(Y));
17094fe895cdSHong Zhang   }
17103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17114fe895cdSHong Zhang }
17124fe895cdSHong Zhang 
1713d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1714d71ae5a4SJacob Faibussowitsch {
17151302d50aSBarry Smith   PetscInt  i;
1716afebec48SHong Zhang   PetscBool flg;
1717a5e6ed63SBarry Smith 
17186849ba73SBarry Smith   PetscFunctionBegin;
17199566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1720a5e6ed63SBarry Smith   for (i = 0; i < n; i++) {
17219566063dSJacob Faibussowitsch     PetscCall(ISEqual(irow[i], icol[i], &flg));
172248a46eb9SPierre Jolivet     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
17234dcd73b1SHong Zhang   }
17243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1725a5e6ed63SBarry Smith }
1726a5e6ed63SBarry Smith 
1727d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1728d71ae5a4SJacob Faibussowitsch {
17297d68702bSBarry Smith   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
17306f33a894SBarry Smith   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
17317d68702bSBarry Smith 
17327d68702bSBarry Smith   PetscFunctionBegin;
17336f33a894SBarry Smith   if (!Y->preallocated) {
17349566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
17356f33a894SBarry Smith   } else if (!aij->nz) {
1736b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
17379566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1738b83222d8SBarry Smith     aij->nonew = nonew;
17397d68702bSBarry Smith   }
17409566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
17413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17427d68702bSBarry Smith }
17437d68702bSBarry Smith 
1744d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1745d71ae5a4SJacob Faibussowitsch {
17463b49f96aSBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
17473b49f96aSBarry Smith 
17483b49f96aSBarry Smith   PetscFunctionBegin;
17495f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
17509566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
17513b49f96aSBarry Smith   if (d) {
17523b49f96aSBarry Smith     PetscInt rstart;
17539566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
17543b49f96aSBarry Smith     *d += rstart / A->rmap->bs;
17553b49f96aSBarry Smith   }
17563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17573b49f96aSBarry Smith }
17583b49f96aSBarry Smith 
1759d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1760d71ae5a4SJacob Faibussowitsch {
1761a5b7ff6bSBarry Smith   PetscFunctionBegin;
1762a5b7ff6bSBarry Smith   *a = ((Mat_MPISBAIJ *)A->data)->A;
17633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1764a5b7ff6bSBarry Smith }
17653b49f96aSBarry Smith 
1766*17ea310bSPierre Jolivet static PetscErrorCode MatEliminateZeros_MPISBAIJ(Mat A, PetscBool keep)
1767*17ea310bSPierre Jolivet {
1768*17ea310bSPierre Jolivet   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1769*17ea310bSPierre Jolivet 
1770*17ea310bSPierre Jolivet   PetscFunctionBegin;
1771*17ea310bSPierre Jolivet   PetscCall(MatEliminateZeros_SeqSBAIJ(a->A, keep));       // possibly keep zero diagonal coefficients
1772*17ea310bSPierre Jolivet   PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients
1773*17ea310bSPierre Jolivet   PetscFunctionReturn(PETSC_SUCCESS);
1774*17ea310bSPierre Jolivet }
1775*17ea310bSPierre Jolivet 
17763964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1777a30f8f8cSSatish Balay                                        MatGetRow_MPISBAIJ,
1778a30f8f8cSSatish Balay                                        MatRestoreRow_MPISBAIJ,
1779a9d4b620SHong Zhang                                        MatMult_MPISBAIJ,
178097304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPISBAIJ,
1781431c96f7SBarry Smith                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1782431c96f7SBarry Smith                                        MatMultAdd_MPISBAIJ,
1783f4259b30SLisandro Dalcin                                        NULL,
1784f4259b30SLisandro Dalcin                                        NULL,
1785f4259b30SLisandro Dalcin                                        NULL,
1786f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1787f4259b30SLisandro Dalcin                                        NULL,
1788f4259b30SLisandro Dalcin                                        NULL,
178941f059aeSBarry Smith                                        MatSOR_MPISBAIJ,
1790a30f8f8cSSatish Balay                                        MatTranspose_MPISBAIJ,
179197304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPISBAIJ,
1792a30f8f8cSSatish Balay                                        MatEqual_MPISBAIJ,
1793a30f8f8cSSatish Balay                                        MatGetDiagonal_MPISBAIJ,
1794a30f8f8cSSatish Balay                                        MatDiagonalScale_MPISBAIJ,
1795a30f8f8cSSatish Balay                                        MatNorm_MPISBAIJ,
179697304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1797a30f8f8cSSatish Balay                                        MatAssemblyEnd_MPISBAIJ,
1798a30f8f8cSSatish Balay                                        MatSetOption_MPISBAIJ,
1799a30f8f8cSSatish Balay                                        MatZeroEntries_MPISBAIJ,
1800f4259b30SLisandro Dalcin                                        /* 24*/ NULL,
1801f4259b30SLisandro Dalcin                                        NULL,
1802f4259b30SLisandro Dalcin                                        NULL,
1803f4259b30SLisandro Dalcin                                        NULL,
1804f4259b30SLisandro Dalcin                                        NULL,
180526cec326SBarry Smith                                        /* 29*/ MatSetUp_MPI_Hash,
1806f4259b30SLisandro Dalcin                                        NULL,
1807f4259b30SLisandro Dalcin                                        NULL,
1808a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPISBAIJ,
1809f4259b30SLisandro Dalcin                                        NULL,
1810d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPISBAIJ,
1811f4259b30SLisandro Dalcin                                        NULL,
1812f4259b30SLisandro Dalcin                                        NULL,
1813f4259b30SLisandro Dalcin                                        NULL,
1814f4259b30SLisandro Dalcin                                        NULL,
1815d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPISBAIJ,
18167dae84e0SHong Zhang                                        MatCreateSubMatrices_MPISBAIJ,
1817d94109b8SHong Zhang                                        MatIncreaseOverlap_MPISBAIJ,
1818a30f8f8cSSatish Balay                                        MatGetValues_MPISBAIJ,
18193c896bc6SHong Zhang                                        MatCopy_MPISBAIJ,
1820f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
1821a30f8f8cSSatish Balay                                        MatScale_MPISBAIJ,
18227d68702bSBarry Smith                                        MatShift_MPISBAIJ,
1823f4259b30SLisandro Dalcin                                        NULL,
1824f4259b30SLisandro Dalcin                                        NULL,
1825f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
1826f4259b30SLisandro Dalcin                                        NULL,
1827f4259b30SLisandro Dalcin                                        NULL,
1828f4259b30SLisandro Dalcin                                        NULL,
1829f4259b30SLisandro Dalcin                                        NULL,
1830f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1831f4259b30SLisandro Dalcin                                        NULL,
1832a30f8f8cSSatish Balay                                        MatSetUnfactored_MPISBAIJ,
1833f4259b30SLisandro Dalcin                                        NULL,
1834a30f8f8cSSatish Balay                                        MatSetValuesBlocked_MPISBAIJ,
18357dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1836f4259b30SLisandro Dalcin                                        NULL,
1837f4259b30SLisandro Dalcin                                        NULL,
1838f4259b30SLisandro Dalcin                                        NULL,
1839f4259b30SLisandro Dalcin                                        NULL,
1840f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1841f4259b30SLisandro Dalcin                                        NULL,
1842f4259b30SLisandro Dalcin                                        NULL,
1843f4259b30SLisandro Dalcin                                        NULL,
1844f4259b30SLisandro Dalcin                                        NULL,
1845d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1846f4259b30SLisandro Dalcin                                        NULL,
184728d58a37SPierre Jolivet                                        MatConvert_MPISBAIJ_Basic,
1848f4259b30SLisandro Dalcin                                        NULL,
1849f4259b30SLisandro Dalcin                                        NULL,
1850f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1851f4259b30SLisandro Dalcin                                        NULL,
1852f4259b30SLisandro Dalcin                                        NULL,
1853f4259b30SLisandro Dalcin                                        NULL,
1854f4259b30SLisandro Dalcin                                        NULL,
1855f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1856f4259b30SLisandro Dalcin                                        NULL,
1857f4259b30SLisandro Dalcin                                        NULL,
1858f4259b30SLisandro Dalcin                                        NULL,
18595bba2384SShri Abhyankar                                        MatLoad_MPISBAIJ,
1860f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
1861f4259b30SLisandro Dalcin                                        NULL,
1862f4259b30SLisandro Dalcin                                        NULL,
1863f4259b30SLisandro Dalcin                                        NULL,
1864f4259b30SLisandro Dalcin                                        NULL,
1865f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1866f4259b30SLisandro Dalcin                                        NULL,
1867f4259b30SLisandro Dalcin                                        NULL,
1868f4259b30SLisandro Dalcin                                        NULL,
1869f4259b30SLisandro Dalcin                                        NULL,
1870f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1871f4259b30SLisandro Dalcin                                        NULL,
1872f4259b30SLisandro Dalcin                                        NULL,
1873f4259b30SLisandro Dalcin                                        NULL,
1874f4259b30SLisandro Dalcin                                        NULL,
1875f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
1876f4259b30SLisandro Dalcin                                        NULL,
1877f4259b30SLisandro Dalcin                                        NULL,
18782726fb6dSPierre Jolivet                                        MatConjugate_MPISBAIJ,
1879f4259b30SLisandro Dalcin                                        NULL,
1880f4259b30SLisandro Dalcin                                        /*104*/ NULL,
188199cafbc1SBarry Smith                                        MatRealPart_MPISBAIJ,
1882d0d4cfc2SHong Zhang                                        MatImaginaryPart_MPISBAIJ,
1883d0d4cfc2SHong Zhang                                        MatGetRowUpperTriangular_MPISBAIJ,
188495936485SShri Abhyankar                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1885f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1886f4259b30SLisandro Dalcin                                        NULL,
1887f4259b30SLisandro Dalcin                                        NULL,
1888f4259b30SLisandro Dalcin                                        NULL,
18893b49f96aSBarry Smith                                        MatMissingDiagonal_MPISBAIJ,
1890f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1891f4259b30SLisandro Dalcin                                        NULL,
1892f4259b30SLisandro Dalcin                                        NULL,
1893f4259b30SLisandro Dalcin                                        NULL,
1894f4259b30SLisandro Dalcin                                        NULL,
1895f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1896f4259b30SLisandro Dalcin                                        NULL,
1897f4259b30SLisandro Dalcin                                        NULL,
1898f4259b30SLisandro Dalcin                                        NULL,
1899f4259b30SLisandro Dalcin                                        NULL,
1900f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1901f4259b30SLisandro Dalcin                                        NULL,
1902f4259b30SLisandro Dalcin                                        NULL,
1903f4259b30SLisandro Dalcin                                        NULL,
1904f4259b30SLisandro Dalcin                                        NULL,
1905f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1906f4259b30SLisandro Dalcin                                        NULL,
1907f4259b30SLisandro Dalcin                                        NULL,
1908f4259b30SLisandro Dalcin                                        NULL,
1909f4259b30SLisandro Dalcin                                        NULL,
1910f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1911f4259b30SLisandro Dalcin                                        NULL,
1912f4259b30SLisandro Dalcin                                        NULL,
1913f4259b30SLisandro Dalcin                                        NULL,
1914f4259b30SLisandro Dalcin                                        NULL,
191546533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
1916f4259b30SLisandro Dalcin                                        NULL,
1917f4259b30SLisandro Dalcin                                        NULL,
1918f4259b30SLisandro Dalcin                                        NULL,
1919f4259b30SLisandro Dalcin                                        NULL,
1920d70f29a3SPierre Jolivet                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1921d70f29a3SPierre Jolivet                                        NULL,
1922d70f29a3SPierre Jolivet                                        NULL,
192399a7f59eSMark Adams                                        NULL,
192499a7f59eSMark Adams                                        NULL,
19257fb60732SBarry Smith                                        NULL,
1926dec0b466SHong Zhang                                        /*150*/ NULL,
1927*17ea310bSPierre Jolivet                                        MatEliminateZeros_MPISBAIJ};
1928a30f8f8cSSatish Balay 
1929d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1930d71ae5a4SJacob Faibussowitsch {
1931476417e5SBarry Smith   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1932535b19f3SBarry Smith   PetscInt      i, mbs, Mbs;
19335d2a9ed1SStefano Zampini   PetscMPIInt   size;
1934a23d5eceSKris Buschelman 
1935a23d5eceSKris Buschelman   PetscFunctionBegin;
1936ad79cf63SBarry Smith   if (B->hash_active) {
1937aea10558SJacob Faibussowitsch     B->ops[0]      = b->cops;
1938ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
1939ad79cf63SBarry Smith   }
1940ad79cf63SBarry Smith   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
19419566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
19429566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19439566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
19449566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
19455f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
19465f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1947899cda47SBarry Smith 
1948d0f46423SBarry Smith   mbs = B->rmap->n / bs;
1949d0f46423SBarry Smith   Mbs = B->rmap->N / bs;
19505f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1951a23d5eceSKris Buschelman 
1952d0f46423SBarry Smith   B->rmap->bs = bs;
1953a23d5eceSKris Buschelman   b->bs2      = bs * bs;
1954a23d5eceSKris Buschelman   b->mbs      = mbs;
1955a23d5eceSKris Buschelman   b->Mbs      = Mbs;
1956de64b629SHong Zhang   b->nbs      = B->cmap->n / bs;
1957de64b629SHong Zhang   b->Nbs      = B->cmap->N / bs;
1958a23d5eceSKris Buschelman 
1959ad540459SPierre Jolivet   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1960d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart / bs;
1961d0f46423SBarry Smith   b->rendbs   = B->rmap->rend / bs;
1962a23d5eceSKris Buschelman 
1963d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart / bs;
1964d0f46423SBarry Smith   b->cendbs   = B->cmap->rend / bs;
1965a23d5eceSKris Buschelman 
1966cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
1967eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&b->colmap));
1968cb7b82ddSBarry Smith #else
19699566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
1970cb7b82ddSBarry Smith #endif
19719566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
19729566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
19739566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
19749566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0));
19759566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0b));
19769566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1));
19779566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1a));
19789566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1b));
19799566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->sMvctx));
1980cb7b82ddSBarry Smith 
19819566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
19829566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
19839566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
19849566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
19859566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1986cb7b82ddSBarry Smith 
1987ad79cf63SBarry Smith   PetscCall(MatDestroy(&b->A));
19889566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
19899566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
19909566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1991a23d5eceSKris Buschelman 
19929566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
19939566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
199426fbe8dcSKarl Rupp 
1995526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
1996cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
1997cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
19983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1999a23d5eceSKris Buschelman }
2000a23d5eceSKris Buschelman 
2001d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
2002d71ae5a4SJacob Faibussowitsch {
200302106b30SBarry Smith   PetscInt        m, rstart, cend;
2004f4259b30SLisandro Dalcin   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2005f4259b30SLisandro Dalcin   const PetscInt *JJ          = NULL;
2006f4259b30SLisandro Dalcin   PetscScalar    *values      = NULL;
2007bb80cfbbSStefano Zampini   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
20083bd0feecSPierre Jolivet   PetscBool       nooffprocentries;
2009dfb205c3SBarry Smith 
2010dfb205c3SBarry Smith   PetscFunctionBegin;
20115f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
20129566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
20139566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
20149566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
20159566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
20169566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2017dfb205c3SBarry Smith   m      = B->rmap->n / bs;
2018dfb205c3SBarry Smith   rstart = B->rmap->rstart / bs;
2019dfb205c3SBarry Smith   cend   = B->cmap->rend / bs;
2020dfb205c3SBarry Smith 
20215f80ce2aSJacob Faibussowitsch   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
20229566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2023dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2024dfb205c3SBarry Smith     nz = ii[i + 1] - ii[i];
20255f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
20260cd7f59aSBarry Smith     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2027dfb205c3SBarry Smith     JJ = jj + ii[i];
20280cd7f59aSBarry Smith     bd = 0;
2029dfb205c3SBarry Smith     for (j = 0; j < nz; j++) {
20300cd7f59aSBarry Smith       if (*JJ >= i + rstart) break;
2031dfb205c3SBarry Smith       JJ++;
20320cd7f59aSBarry Smith       bd++;
2033dfb205c3SBarry Smith     }
2034dfb205c3SBarry Smith     d = 0;
2035dfb205c3SBarry Smith     for (; j < nz; j++) {
2036dfb205c3SBarry Smith       if (*JJ++ >= cend) break;
2037dfb205c3SBarry Smith       d++;
2038dfb205c3SBarry Smith     }
2039dfb205c3SBarry Smith     d_nnz[i] = d;
20400cd7f59aSBarry Smith     o_nnz[i] = nz - d - bd;
20410cd7f59aSBarry Smith     nz       = nz - bd;
20420cd7f59aSBarry Smith     nz_max   = PetscMax(nz_max, nz);
2043dfb205c3SBarry Smith   }
20449566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
20459566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
20469566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
2047dfb205c3SBarry Smith 
2048dfb205c3SBarry Smith   values = (PetscScalar *)V;
204948a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2050dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2051dfb205c3SBarry Smith     PetscInt        row   = i + rstart;
2052dfb205c3SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
2053dfb205c3SBarry Smith     const PetscInt *icols = jj + ii[i];
2054bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2055dfb205c3SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
20569566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2057bb80cfbbSStefano Zampini     } else { /* block ordering does not match so we can only insert one block at a time. */
2058bb80cfbbSStefano Zampini       PetscInt j;
20590cd7f59aSBarry Smith       for (j = 0; j < ncols; j++) {
20600cd7f59aSBarry Smith         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
20619566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
20620cd7f59aSBarry Smith       }
20630cd7f59aSBarry Smith     }
2064dfb205c3SBarry Smith   }
2065dfb205c3SBarry Smith 
20669566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
20673bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
20683bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
20699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
20709566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
20713bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
20723bd0feecSPierre Jolivet 
20739566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
20743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2075dfb205c3SBarry Smith }
2076dfb205c3SBarry Smith 
20770bad9183SKris Buschelman /*MC
2078fafad747SKris Buschelman    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2079828413b8SBarry Smith    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2080828413b8SBarry Smith    the matrix is stored.
2081828413b8SBarry Smith 
2082828413b8SBarry Smith    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
208311a5261eSBarry Smith    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
20840bad9183SKris Buschelman 
20852ef1f0ffSBarry Smith    Options Database Key:
208611a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
20870bad9183SKris Buschelman 
20882ef1f0ffSBarry Smith    Level: beginner
20892ef1f0ffSBarry Smith 
209011a5261eSBarry Smith    Note:
2091476417e5SBarry Smith      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2092476417e5SBarry Smith      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2093476417e5SBarry Smith 
20941cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
20950bad9183SKris Buschelman M*/
20960bad9183SKris Buschelman 
2097d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2098d71ae5a4SJacob Faibussowitsch {
2099b5df2d14SHong Zhang   Mat_MPISBAIJ *b;
210094ae4db5SBarry Smith   PetscBool     flg = PETSC_FALSE;
2101b5df2d14SHong Zhang 
2102b5df2d14SHong Zhang   PetscFunctionBegin;
21034dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
2104b0a32e0cSBarry Smith   B->data   = (void *)b;
2105aea10558SJacob Faibussowitsch   B->ops[0] = MatOps_Values;
2106b5df2d14SHong Zhang 
2107b5df2d14SHong Zhang   B->ops->destroy = MatDestroy_MPISBAIJ;
2108b5df2d14SHong Zhang   B->ops->view    = MatView_MPISBAIJ;
2109b5df2d14SHong Zhang   B->assembled    = PETSC_FALSE;
2110b5df2d14SHong Zhang   B->insertmode   = NOT_SET_VALUES;
211126fbe8dcSKarl Rupp 
21129566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
21139566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2114b5df2d14SHong Zhang 
2115b5df2d14SHong Zhang   /* build local table of row and column ownerships */
21169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2117b5df2d14SHong Zhang 
2118b5df2d14SHong Zhang   /* build cache for off array entries formed */
21199566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
212026fbe8dcSKarl Rupp 
2121b5df2d14SHong Zhang   b->donotstash  = PETSC_FALSE;
21220298fd71SBarry Smith   b->colmap      = NULL;
21230298fd71SBarry Smith   b->garray      = NULL;
2124b5df2d14SHong Zhang   b->roworiented = PETSC_TRUE;
2125b5df2d14SHong Zhang 
2126b5df2d14SHong Zhang   /* stuff used in block assembly */
2127f4259b30SLisandro Dalcin   b->barray = NULL;
2128b5df2d14SHong Zhang 
2129b5df2d14SHong Zhang   /* stuff used for matrix vector multiply */
2130f4259b30SLisandro Dalcin   b->lvec    = NULL;
2131f4259b30SLisandro Dalcin   b->Mvctx   = NULL;
2132f4259b30SLisandro Dalcin   b->slvec0  = NULL;
2133f4259b30SLisandro Dalcin   b->slvec0b = NULL;
2134f4259b30SLisandro Dalcin   b->slvec1  = NULL;
2135f4259b30SLisandro Dalcin   b->slvec1a = NULL;
2136f4259b30SLisandro Dalcin   b->slvec1b = NULL;
2137f4259b30SLisandro Dalcin   b->sMvctx  = NULL;
2138b5df2d14SHong Zhang 
2139b5df2d14SHong Zhang   /* stuff for MatGetRow() */
2140f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2141f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2142b5df2d14SHong Zhang   b->getrowactive = PETSC_FALSE;
2143b5df2d14SHong Zhang 
2144b5df2d14SHong Zhang   /* hash table stuff */
2145f4259b30SLisandro Dalcin   b->ht           = NULL;
2146f4259b30SLisandro Dalcin   b->hd           = NULL;
2147b5df2d14SHong Zhang   b->ht_size      = 0;
2148b5df2d14SHong Zhang   b->ht_flag      = PETSC_FALSE;
2149b5df2d14SHong Zhang   b->ht_fact      = 0;
2150b5df2d14SHong Zhang   b->ht_total_ct  = 0;
2151b5df2d14SHong Zhang   b->ht_insert_ct = 0;
2152b5df2d14SHong Zhang 
21537dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
21547a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
21557a868f3eSHong Zhang 
2156f4259b30SLisandro Dalcin   b->in_loc = NULL;
2157f4259b30SLisandro Dalcin   b->v_loc  = NULL;
215859ffdab8SBarry Smith   b->n_loc  = 0;
215994ae4db5SBarry Smith 
21609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
21619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
21629566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
21639566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
21646214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
21659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
21666214f412SHong Zhang #endif
2167d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
21689566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2169d24d4204SJose E. Roman #endif
21709566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
21719566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2172aa5a9175SDahai Guo 
2173b94d7dedSBarry Smith   B->symmetric                   = PETSC_BOOL3_TRUE;
2174b94d7dedSBarry Smith   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2175b94d7dedSBarry Smith   B->symmetry_eternal            = PETSC_TRUE;
2176b94d7dedSBarry Smith   B->structural_symmetry_eternal = PETSC_TRUE;
2177eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
2178b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_FALSE;
2179eb1ec7c1SStefano Zampini #else
2180b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_TRUE;
2181eb1ec7c1SStefano Zampini #endif
218213647f61SHong Zhang 
21839566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2184d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
21859566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
218694ae4db5SBarry Smith   if (flg) {
218794ae4db5SBarry Smith     PetscReal fact = 1.39;
21889566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
21899566063dSJacob Faibussowitsch     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
219094ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
21919566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
21929566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
219394ae4db5SBarry Smith   }
2194d0609cedSBarry Smith   PetscOptionsEnd();
21953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2196b5df2d14SHong Zhang }
2197b5df2d14SHong Zhang 
21982920cce0SJacob Faibussowitsch // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2199209238afSKris Buschelman /*MC
2200002d173eSKris Buschelman    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2201209238afSKris Buschelman 
220211a5261eSBarry Smith    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
220311a5261eSBarry Smith    and `MATMPISBAIJ` otherwise.
2204209238afSKris Buschelman 
220511a5261eSBarry Smith    Options Database Key:
2206c5dec841SPierre Jolivet . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2207209238afSKris Buschelman 
2208209238afSKris Buschelman   Level: beginner
2209209238afSKris Buschelman 
22101cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2211209238afSKris Buschelman M*/
2212209238afSKris Buschelman 
2213b5df2d14SHong Zhang /*@C
2214b5df2d14SHong Zhang   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2215b5df2d14SHong Zhang   the user should preallocate the matrix storage by setting the parameters
2216b5df2d14SHong Zhang   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2217b5df2d14SHong Zhang   performance can be increased by more than a factor of 50.
2218b5df2d14SHong Zhang 
2219c3339decSBarry Smith   Collective
2220b5df2d14SHong Zhang 
2221b5df2d14SHong Zhang   Input Parameters:
22221c4f3114SJed Brown + B     - the matrix
2223bb7ae925SBarry Smith . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2224bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2225b5df2d14SHong Zhang . d_nz  - number of block nonzeros per block row in diagonal portion of local
2226b5df2d14SHong Zhang            submatrix  (same for all local rows)
2227b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows
22286d10fdaeSSatish Balay            in the upper triangular and diagonal part of the in diagonal portion of the local
22292ef1f0ffSBarry Smith            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
223095742e49SBarry Smith            for the diagonal entry and set a value even if it is zero.
2231b5df2d14SHong Zhang . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2232b5df2d14SHong Zhang            submatrix (same for all local rows).
2233b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the
2234c2fc9fa9SBarry Smith            off-diagonal portion of the local submatrix that is right of the diagonal
22352ef1f0ffSBarry Smith            (possibly different for each block row) or `NULL`.
2236b5df2d14SHong Zhang 
2237b5df2d14SHong Zhang   Options Database Keys:
2238a2b725a8SWilliam Gropp + -mat_no_unroll  - uses code that does not unroll the loops in the
2239b5df2d14SHong Zhang                      block calculations (much slower)
2240a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
2241b5df2d14SHong Zhang 
22422ef1f0ffSBarry Smith   Level: intermediate
22432ef1f0ffSBarry Smith 
2244b5df2d14SHong Zhang   Notes:
2245b5df2d14SHong Zhang 
224611a5261eSBarry Smith   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2247b5df2d14SHong Zhang   than it must be used on all processors that share the object for that argument.
2248b5df2d14SHong Zhang 
224949a6f317SBarry Smith   If the *_nnz parameter is given then the *_nz parameter is ignored
225049a6f317SBarry Smith 
2251b5df2d14SHong Zhang   Storage Information:
2252b5df2d14SHong Zhang   For a square global matrix we define each processor's diagonal portion
2253b5df2d14SHong Zhang   to be its local rows and the corresponding columns (a square submatrix);
2254b5df2d14SHong Zhang   each processor's off-diagonal portion encompasses the remainder of the
2255b5df2d14SHong Zhang   local matrix (a rectangular submatrix).
2256b5df2d14SHong Zhang 
2257b5df2d14SHong Zhang   The user can specify preallocated storage for the diagonal part of
22582ef1f0ffSBarry Smith   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
22592ef1f0ffSBarry Smith   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2260b5df2d14SHong Zhang   memory allocation.  Likewise, specify preallocated storage for the
22612ef1f0ffSBarry Smith   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2262b5df2d14SHong Zhang 
226311a5261eSBarry Smith   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2264aa95bbe8SBarry Smith   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
22652ef1f0ffSBarry Smith   You can also run with the option `-info` and look for messages with the string
2266aa95bbe8SBarry Smith   malloc in them to see if additional memory allocation was needed.
2267aa95bbe8SBarry Smith 
2268b5df2d14SHong Zhang   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2269b5df2d14SHong Zhang   the figure below we depict these three local rows and all columns (0-11).
2270b5df2d14SHong Zhang 
2271b5df2d14SHong Zhang .vb
2272b5df2d14SHong Zhang            0 1 2 3 4 5 6 7 8 9 10 11
2273a4b1a0f6SJed Brown           --------------------------
2274c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2275c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2276c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2277a4b1a0f6SJed Brown           --------------------------
2278b5df2d14SHong Zhang .ve
2279b5df2d14SHong Zhang 
2280b5df2d14SHong Zhang   Thus, any entries in the d locations are stored in the d (diagonal)
2281b5df2d14SHong Zhang   submatrix, and any entries in the o locations are stored in the
22826d10fdaeSSatish Balay   o (off-diagonal) submatrix.  Note that the d matrix is stored in
228311a5261eSBarry Smith   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2284b5df2d14SHong Zhang 
22852ef1f0ffSBarry Smith   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
22866d10fdaeSSatish Balay   plus the diagonal part of the d matrix,
22872ef1f0ffSBarry Smith   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2288c2fc9fa9SBarry Smith 
2289b5df2d14SHong Zhang   In general, for PDE problems in which most nonzeros are near the diagonal,
22902ef1f0ffSBarry Smith   one expects `d_nz` >> `o_nz`.
2291b5df2d14SHong Zhang 
22921cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2293b5df2d14SHong Zhang @*/
2294d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2295d71ae5a4SJacob Faibussowitsch {
2296b5df2d14SHong Zhang   PetscFunctionBegin;
22976ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
22986ba663aaSJed Brown   PetscValidType(B, 1);
22996ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
2300cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
23013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2302b5df2d14SHong Zhang }
2303b5df2d14SHong Zhang 
23042920cce0SJacob Faibussowitsch // PetscClangLinter pragma disable: -fdoc-section-header-unknown
2305a30f8f8cSSatish Balay /*@C
230611a5261eSBarry Smith   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2307a30f8f8cSSatish Balay   (block compressed row).  For good matrix assembly performance
2308a30f8f8cSSatish Balay   the user should preallocate the matrix storage by setting the parameters
230920f4b53cSBarry Smith   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2310a30f8f8cSSatish Balay 
2311d083f849SBarry Smith   Collective
2312a30f8f8cSSatish Balay 
2313a30f8f8cSSatish Balay   Input Parameters:
2314a30f8f8cSSatish Balay + comm  - MPI communicator
231511a5261eSBarry Smith . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
231620f4b53cSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
231720f4b53cSBarry Smith . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2318a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2319a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
232020f4b53cSBarry Smith . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2321a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2322a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
232320f4b53cSBarry Smith . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
232420f4b53cSBarry Smith . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2325a30f8f8cSSatish Balay . d_nz  - number of block nonzeros per block row in diagonal portion of local
2326a30f8f8cSSatish Balay            submatrix (same for all local rows)
2327a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows
23286d10fdaeSSatish Balay            in the upper triangular portion of the in diagonal portion of the local
23292ef1f0ffSBarry Smith            (possibly different for each block block row) or `NULL`.
233095742e49SBarry Smith            If you plan to factor the matrix you must leave room for the diagonal entry and
233195742e49SBarry Smith            set its value even if it is zero.
2332a30f8f8cSSatish Balay . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2333a30f8f8cSSatish Balay            submatrix (same for all local rows).
2334a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the
2335a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
23362ef1f0ffSBarry Smith            each block row) or `NULL`.
2337a30f8f8cSSatish Balay 
2338a30f8f8cSSatish Balay   Output Parameter:
2339a30f8f8cSSatish Balay . A - the matrix
2340a30f8f8cSSatish Balay 
2341a30f8f8cSSatish Balay   Options Database Keys:
2342a2b725a8SWilliam Gropp + -mat_no_unroll  - uses code that does not unroll the loops in the
2343a30f8f8cSSatish Balay                      block calculations (much slower)
2344a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use
2345a2b725a8SWilliam Gropp - -mat_mpi        - use the parallel matrix data structures even on one processor
2346a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2347a30f8f8cSSatish Balay 
23482ef1f0ffSBarry Smith   Level: intermediate
23492ef1f0ffSBarry Smith 
23502ef1f0ffSBarry Smith   Notes:
235111a5261eSBarry Smith   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2352f6f02116SRichard Tran Mills   MatXXXXSetPreallocation() paradigm instead of this routine directly.
235311a5261eSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2354175b88e8SBarry Smith 
2355d1be2dadSMatthew Knepley   The number of rows and columns must be divisible by blocksize.
23566d6d819aSHong Zhang   This matrix type does not support complex Hermitian operation.
2357d1be2dadSMatthew Knepley 
2358a30f8f8cSSatish Balay   The user MUST specify either the local or global matrix dimensions
2359a30f8f8cSSatish Balay   (possibly both).
2360a30f8f8cSSatish Balay 
236111a5261eSBarry Smith   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2362a30f8f8cSSatish Balay   than it must be used on all processors that share the object for that argument.
2363a30f8f8cSSatish Balay 
236449a6f317SBarry Smith   If the *_nnz parameter is given then the *_nz parameter is ignored
236549a6f317SBarry Smith 
2366a30f8f8cSSatish Balay   Storage Information:
2367a30f8f8cSSatish Balay   For a square global matrix we define each processor's diagonal portion
2368a30f8f8cSSatish Balay   to be its local rows and the corresponding columns (a square submatrix);
2369a30f8f8cSSatish Balay   each processor's off-diagonal portion encompasses the remainder of the
2370a30f8f8cSSatish Balay   local matrix (a rectangular submatrix).
2371a30f8f8cSSatish Balay 
2372a30f8f8cSSatish Balay   The user can specify preallocated storage for the diagonal part of
23732ef1f0ffSBarry Smith   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
23742ef1f0ffSBarry Smith   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2375a30f8f8cSSatish Balay   memory allocation. Likewise, specify preallocated storage for the
23762ef1f0ffSBarry Smith   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2377a30f8f8cSSatish Balay 
2378a30f8f8cSSatish Balay   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2379a30f8f8cSSatish Balay   the figure below we depict these three local rows and all columns (0-11).
2380a30f8f8cSSatish Balay 
2381a30f8f8cSSatish Balay .vb
2382a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2383a4b1a0f6SJed Brown           --------------------------
2384c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2385c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2386c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2387a4b1a0f6SJed Brown           --------------------------
2388a30f8f8cSSatish Balay .ve
2389a30f8f8cSSatish Balay 
2390a30f8f8cSSatish Balay   Thus, any entries in the d locations are stored in the d (diagonal)
2391a30f8f8cSSatish Balay   submatrix, and any entries in the o locations are stored in the
23926d10fdaeSSatish Balay   o (off-diagonal) submatrix. Note that the d matrix is stored in
23932ef1f0ffSBarry Smith   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2394a30f8f8cSSatish Balay 
23952ef1f0ffSBarry Smith   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
23966d10fdaeSSatish Balay   plus the diagonal part of the d matrix,
23972ef1f0ffSBarry Smith   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2398a30f8f8cSSatish Balay   In general, for PDE problems in which most nonzeros are near the diagonal,
23992ef1f0ffSBarry Smith   one expects `d_nz` >> `o_nz`.
2400a30f8f8cSSatish Balay 
24011cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2402a30f8f8cSSatish Balay @*/
2403d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2404d71ae5a4SJacob Faibussowitsch {
24051302d50aSBarry Smith   PetscMPIInt size;
2406a30f8f8cSSatish Balay 
2407a30f8f8cSSatish Balay   PetscFunctionBegin;
24089566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
24099566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
24109566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2411273d9f13SBarry Smith   if (size > 1) {
24129566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPISBAIJ));
24139566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2414273d9f13SBarry Smith   } else {
24159566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQSBAIJ));
24169566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2417273d9f13SBarry Smith   }
24183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2419a30f8f8cSSatish Balay }
2420a30f8f8cSSatish Balay 
2421d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2422d71ae5a4SJacob Faibussowitsch {
2423a30f8f8cSSatish Balay   Mat           mat;
2424a30f8f8cSSatish Balay   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2425d0f46423SBarry Smith   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2426387bc808SHong Zhang   PetscScalar  *array;
2427a30f8f8cSSatish Balay 
2428a30f8f8cSSatish Balay   PetscFunctionBegin;
2429f4259b30SLisandro Dalcin   *newmat = NULL;
243026fbe8dcSKarl Rupp 
24319566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
24329566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
24339566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
24349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
24359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2436e1b6402fSHong Zhang 
2437d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2438273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
243982327fa8SHong Zhang   mat->assembled    = PETSC_TRUE;
24407fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24417fff6886SHong Zhang 
2442b5df2d14SHong Zhang   a      = (Mat_MPISBAIJ *)mat->data;
2443a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2444a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2445a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2446a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2447a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2448a30f8f8cSSatish Balay 
2449a30f8f8cSSatish Balay   a->size         = oldmat->size;
2450a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2451a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2452a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2453f4259b30SLisandro Dalcin   a->rowindices   = NULL;
2454f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
2455a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2456f4259b30SLisandro Dalcin   a->barray       = NULL;
2457899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
2458899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
2459899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
2460899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
2461a30f8f8cSSatish Balay 
2462a30f8f8cSSatish Balay   /* hash table stuff */
2463f4259b30SLisandro Dalcin   a->ht           = NULL;
2464f4259b30SLisandro Dalcin   a->hd           = NULL;
2465a30f8f8cSSatish Balay   a->ht_size      = 0;
2466a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2467a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2468a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2469a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2470a30f8f8cSSatish Balay 
24719566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2472a30f8f8cSSatish Balay   if (oldmat->colmap) {
2473a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2474eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2475a30f8f8cSSatish Balay #else
24769566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
24779566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2478a30f8f8cSSatish Balay #endif
2479f4259b30SLisandro Dalcin   } else a->colmap = NULL;
2480387bc808SHong Zhang 
2481a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
24829566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len, &a->garray));
24839566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2484f4259b30SLisandro Dalcin   } else a->garray = NULL;
2485a30f8f8cSSatish Balay 
24869566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
24879566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
24889566063dSJacob Faibussowitsch   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
248982327fa8SHong Zhang 
24909566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
24919566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2492387bc808SHong Zhang 
24939566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(a->slvec1, &nt));
24949566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec1, &array));
24959566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
24969566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
24979566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec1, &array));
24989566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &array));
24999566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
25009566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &array));
2501387bc808SHong Zhang 
2502387bc808SHong Zhang   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
25039566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2504387bc808SHong Zhang   a->sMvctx = oldmat->sMvctx;
250582327fa8SHong Zhang 
25069566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
25079566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
25089566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2509a30f8f8cSSatish Balay   *newmat = mat;
25103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2511a30f8f8cSSatish Balay }
2512a30f8f8cSSatish Balay 
2513618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
2514618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2515618cc2edSLisandro Dalcin 
2516d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2517d71ae5a4SJacob Faibussowitsch {
25187f489da9SVaclav Hapla   PetscBool isbinary;
251995936485SShri Abhyankar 
252095936485SShri Abhyankar   PetscFunctionBegin;
25219566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
25225f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
25239566063dSJacob Faibussowitsch   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
25243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
252595936485SShri Abhyankar }
252695936485SShri Abhyankar 
2527d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2528d71ae5a4SJacob Faibussowitsch {
252924d5174aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2530f4c0e9e4SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2531ca54ac64SHong Zhang   PetscReal     atmp;
253287828ca2SBarry Smith   PetscReal    *work, *svalues, *rvalues;
25331302d50aSBarry Smith   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
25341302d50aSBarry Smith   PetscMPIInt   rank, size;
25351302d50aSBarry Smith   PetscInt     *rowners_bs, dest, count, source;
253687828ca2SBarry Smith   PetscScalar  *va;
25378a1c53f2SBarry Smith   MatScalar    *ba;
2538f4c0e9e4SHong Zhang   MPI_Status    stat;
253924d5174aSHong Zhang 
254024d5174aSHong Zhang   PetscFunctionBegin;
25415f80ce2aSJacob Faibussowitsch   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
25429566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
25439566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &va));
2544f4c0e9e4SHong Zhang 
25459566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
25469566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2547f4c0e9e4SHong Zhang 
2548d0f46423SBarry Smith   bs  = A->rmap->bs;
2549f4c0e9e4SHong Zhang   mbs = a->mbs;
2550f4c0e9e4SHong Zhang   Mbs = a->Mbs;
2551f4c0e9e4SHong Zhang   ba  = b->a;
2552f4c0e9e4SHong Zhang   bi  = b->i;
2553f4c0e9e4SHong Zhang   bj  = b->j;
2554f4c0e9e4SHong Zhang 
2555f4c0e9e4SHong Zhang   /* find ownerships */
2556d0f46423SBarry Smith   rowners_bs = A->rmap->range;
2557f4c0e9e4SHong Zhang 
2558f4c0e9e4SHong Zhang   /* each proc creates an array to be distributed */
25599566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs * Mbs, &work));
2560f4c0e9e4SHong Zhang 
2561f4c0e9e4SHong Zhang   /* row_max for B */
2562b8475685SHong Zhang   if (rank != size - 1) {
2563f4c0e9e4SHong Zhang     for (i = 0; i < mbs; i++) {
25649371c9d4SSatish Balay       ncols = bi[1] - bi[0];
25659371c9d4SSatish Balay       bi++;
2566f4c0e9e4SHong Zhang       brow = bs * i;
2567f4c0e9e4SHong Zhang       for (j = 0; j < ncols; j++) {
2568f4c0e9e4SHong Zhang         bcol = bs * (*bj);
2569f4c0e9e4SHong Zhang         for (kcol = 0; kcol < bs; kcol++) {
2570ca54ac64SHong Zhang           col = bcol + kcol;           /* local col index */
257104d41228SHong Zhang           col += rowners_bs[rank + 1]; /* global col index */
2572f4c0e9e4SHong Zhang           for (krow = 0; krow < bs; krow++) {
25739371c9d4SSatish Balay             atmp = PetscAbsScalar(*ba);
25749371c9d4SSatish Balay             ba++;
2575ca54ac64SHong Zhang             row = brow + krow; /* local row index */
2576ca54ac64SHong Zhang             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2577f4c0e9e4SHong Zhang             if (work[col] < atmp) work[col] = atmp;
2578f4c0e9e4SHong Zhang           }
2579f4c0e9e4SHong Zhang         }
2580f4c0e9e4SHong Zhang         bj++;
2581f4c0e9e4SHong Zhang       }
2582f4c0e9e4SHong Zhang     }
2583f4c0e9e4SHong Zhang 
2584f4c0e9e4SHong Zhang     /* send values to its owners */
2585f4c0e9e4SHong Zhang     for (dest = rank + 1; dest < size; dest++) {
2586f4c0e9e4SHong Zhang       svalues = work + rowners_bs[dest];
2587ca54ac64SHong Zhang       count   = rowners_bs[dest + 1] - rowners_bs[dest];
25889566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2589ca54ac64SHong Zhang     }
2590f4c0e9e4SHong Zhang   }
2591f4c0e9e4SHong Zhang 
2592f4c0e9e4SHong Zhang   /* receive values */
2593ca54ac64SHong Zhang   if (rank) {
2594f4c0e9e4SHong Zhang     rvalues = work;
2595ca54ac64SHong Zhang     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2596f4c0e9e4SHong Zhang     for (source = 0; source < rank; source++) {
25979566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2598f4c0e9e4SHong Zhang       /* process values */
2599f4c0e9e4SHong Zhang       for (i = 0; i < count; i++) {
2600ca54ac64SHong Zhang         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2601f4c0e9e4SHong Zhang       }
2602f4c0e9e4SHong Zhang     }
2603ca54ac64SHong Zhang   }
2604f4c0e9e4SHong Zhang 
26059566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &va));
26069566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
26073ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
260824d5174aSHong Zhang }
26092798e883SHong Zhang 
2610d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2611d71ae5a4SJacob Faibussowitsch {
26122798e883SHong Zhang   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2613d0f46423SBarry Smith   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
26143649974fSBarry Smith   PetscScalar       *x, *ptr, *from;
2615ffe4fb16SHong Zhang   Vec                bb1;
26163649974fSBarry Smith   const PetscScalar *b;
2617ffe4fb16SHong Zhang 
2618ffe4fb16SHong Zhang   PetscFunctionBegin;
26195f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
26205f80ce2aSJacob Faibussowitsch   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2621ffe4fb16SHong Zhang 
2622a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
26239566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
26243ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2625a2b30743SBarry Smith   }
2626a2b30743SBarry Smith 
2627ffe4fb16SHong Zhang   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2628ffe4fb16SHong Zhang     if (flag & SOR_ZERO_INITIAL_GUESS) {
26299566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2630ffe4fb16SHong Zhang       its--;
2631ffe4fb16SHong Zhang     }
2632ffe4fb16SHong Zhang 
26339566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb, &bb1));
2634ffe4fb16SHong Zhang     while (its--) {
2635ffe4fb16SHong Zhang       /* lower triangular part: slvec0b = - B^T*xx */
26369566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2637ffe4fb16SHong Zhang 
2638ffe4fb16SHong Zhang       /* copy xx into slvec0a */
26399566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec0, &ptr));
26409566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26419566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
26429566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2643ffe4fb16SHong Zhang 
26449566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->slvec0, -1.0));
2645ffe4fb16SHong Zhang 
2646ffe4fb16SHong Zhang       /* copy bb into slvec1a */
26479566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1, &ptr));
26489566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26499566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
26509566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2651ffe4fb16SHong Zhang 
2652ffe4fb16SHong Zhang       /* set slvec1b = 0 */
2653629a200eSBarry Smith       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2654629a200eSBarry Smith       PetscCall(VecZeroEntries(mat->slvec1b));
2655ffe4fb16SHong Zhang 
26569566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
26579566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
26589566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
26599566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2660ffe4fb16SHong Zhang 
2661ffe4fb16SHong Zhang       /* upper triangular part: bb1 = bb1 - B*x */
26629566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2663ffe4fb16SHong Zhang 
2664ffe4fb16SHong Zhang       /* local diagonal sweep */
26659566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2666ffe4fb16SHong Zhang     }
26679566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&bb1));
2668fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26699566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2670fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26719566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2672fa22f6d0SBarry Smith   } else if (flag & SOR_EISENSTAT) {
2673fa22f6d0SBarry Smith     Vec                xx1;
2674ace3abfcSBarry Smith     PetscBool          hasop;
267520f1ed55SBarry Smith     const PetscScalar *diag;
2676887ee2caSBarry Smith     PetscScalar       *sl, scale = (omega - 2.0) / omega;
267720f1ed55SBarry Smith     PetscInt           i, n;
2678fa22f6d0SBarry Smith 
2679fa22f6d0SBarry Smith     if (!mat->xx1) {
26809566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->xx1));
26819566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->bb1));
2682fa22f6d0SBarry Smith     }
2683fa22f6d0SBarry Smith     xx1 = mat->xx1;
2684fa22f6d0SBarry Smith     bb1 = mat->bb1;
2685fa22f6d0SBarry Smith 
26869566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2687fa22f6d0SBarry Smith 
2688fa22f6d0SBarry Smith     if (!mat->diag) {
2689effcda25SBarry Smith       /* this is wrong for same matrix with new nonzero values */
26909566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
26919566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin, mat->diag));
2692fa22f6d0SBarry Smith     }
26939566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2694fa22f6d0SBarry Smith 
2695fa22f6d0SBarry Smith     if (hasop) {
26969566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
26979566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
269820f1ed55SBarry Smith     } else {
269920f1ed55SBarry Smith       /*
270020f1ed55SBarry Smith           These two lines are replaced by code that may be a bit faster for a good compiler
27019566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
27029566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
270320f1ed55SBarry Smith       */
27049566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1a, &sl));
27059566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(mat->diag, &diag));
27069566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
27079566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
27089566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(xx, &n));
2709887ee2caSBarry Smith       if (omega == 1.0) {
271026fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
27119566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0 * n));
2712887ee2caSBarry Smith       } else {
271326fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
27149566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(3.0 * n));
2715887ee2caSBarry Smith       }
27169566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
27179566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
27189566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
27199566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
272020f1ed55SBarry Smith     }
2721fa22f6d0SBarry Smith 
2722fa22f6d0SBarry Smith     /* multiply off-diagonal portion of matrix */
2723629a200eSBarry Smith     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2724629a200eSBarry Smith     PetscCall(VecZeroEntries(mat->slvec1b));
27259566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
27269566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mat->slvec0, &from));
27279566063dSJacob Faibussowitsch     PetscCall(VecGetArray(xx, &x));
27289566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(from, x, bs * mbs));
27299566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mat->slvec0, &from));
27309566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(xx, &x));
27319566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27329566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27339566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2734fa22f6d0SBarry Smith 
2735fa22f6d0SBarry Smith     /* local sweep */
27369566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
27379566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx, 1.0, xx1));
2738f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
27393ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2740ffe4fb16SHong Zhang }
2741ffe4fb16SHong Zhang 
2742dfb205c3SBarry Smith /*@
274311a5261eSBarry Smith   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2744dfb205c3SBarry Smith   CSR format the local rows.
2745dfb205c3SBarry Smith 
2746d083f849SBarry Smith   Collective
2747dfb205c3SBarry Smith 
2748dfb205c3SBarry Smith   Input Parameters:
2749dfb205c3SBarry Smith + comm - MPI communicator
2750dfb205c3SBarry Smith . bs   - the block size, only a block size of 1 is supported
275111a5261eSBarry Smith . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2752dfb205c3SBarry Smith . n    - This value should be the same as the local size used in creating the
275311a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
27542ef1f0ffSBarry Smith        calculated if `N` is given) For square matrices `n` is almost always `m`.
27552ef1f0ffSBarry Smith . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
27562ef1f0ffSBarry Smith . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2757483a2f95SBarry Smith . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2758dfb205c3SBarry Smith . j    - column indices
2759dfb205c3SBarry Smith - a    - matrix values
2760dfb205c3SBarry Smith 
2761dfb205c3SBarry Smith   Output Parameter:
2762dfb205c3SBarry Smith . mat - the matrix
2763dfb205c3SBarry Smith 
2764dfb205c3SBarry Smith   Level: intermediate
2765dfb205c3SBarry Smith 
2766dfb205c3SBarry Smith   Notes:
27672ef1f0ffSBarry Smith   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
27682ef1f0ffSBarry Smith   thus you CANNOT change the matrix entries by changing the values of `a` after you have
27692ef1f0ffSBarry Smith   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2770dfb205c3SBarry Smith 
27712ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2772dfb205c3SBarry Smith 
27731cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2774db781477SPatrick Sanan           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2775dfb205c3SBarry Smith @*/
2776d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2777d71ae5a4SJacob Faibussowitsch {
2778dfb205c3SBarry Smith   PetscFunctionBegin;
27795f80ce2aSJacob Faibussowitsch   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
27805f80ce2aSJacob Faibussowitsch   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
27819566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
27829566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
27839566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPISBAIJ));
27849566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
27853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2786dfb205c3SBarry Smith }
2787dfb205c3SBarry Smith 
2788dfb205c3SBarry Smith /*@C
278911a5261eSBarry Smith   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2790dfb205c3SBarry Smith 
2791d083f849SBarry Smith   Collective
2792dfb205c3SBarry Smith 
2793dfb205c3SBarry Smith   Input Parameters:
27941c4f3114SJed Brown + B  - the matrix
2795dfb205c3SBarry Smith . bs - the block size
27962ef1f0ffSBarry Smith . i  - the indices into `j` for the start of each local row (starts with zero)
2797dfb205c3SBarry Smith . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2798dfb205c3SBarry Smith - v  - optional values in the matrix
2799dfb205c3SBarry Smith 
2800664954b6SBarry Smith   Level: advanced
2801664954b6SBarry Smith 
2802664954b6SBarry Smith   Notes:
28030cd7f59aSBarry Smith   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
28040cd7f59aSBarry Smith   and usually the numerical values as well
28050cd7f59aSBarry Smith 
280650c5228eSBarry Smith   Any entries below the diagonal are ignored
2807dfb205c3SBarry Smith 
28081cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2809dfb205c3SBarry Smith @*/
2810d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2811d71ae5a4SJacob Faibussowitsch {
2812dfb205c3SBarry Smith   PetscFunctionBegin;
2813cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
28143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2815dfb205c3SBarry Smith }
2816dfb205c3SBarry Smith 
2817d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2818d71ae5a4SJacob Faibussowitsch {
281910c56fdeSHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
282010c56fdeSHong Zhang   PetscInt    *indx;
282110c56fdeSHong Zhang   PetscScalar *values;
2822dfb205c3SBarry Smith 
28234dcd73b1SHong Zhang   PetscFunctionBegin;
28249566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
282510c56fdeSHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
282610c56fdeSHong Zhang     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2827de25e9cbSPierre Jolivet     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
282810c56fdeSHong Zhang     PetscInt     *bindx, rmax = a->rmax, j;
2829de25e9cbSPierre Jolivet     PetscMPIInt   rank, size;
28304dcd73b1SHong Zhang 
28319566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28329371c9d4SSatish Balay     mbs = m / bs;
28339371c9d4SSatish Balay     Nbs = N / cbs;
283448a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2835da91a574SPierre Jolivet     nbs = n / cbs;
28364dcd73b1SHong Zhang 
28379566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(rmax, &bindx));
2838d0609cedSBarry Smith     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2839de25e9cbSPierre Jolivet 
28409566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
28419566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &size));
2842de25e9cbSPierre Jolivet     if (rank == size - 1) {
2843de25e9cbSPierre Jolivet       /* Check sum(nbs) = Nbs */
28445f80ce2aSJacob Faibussowitsch       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2845de25e9cbSPierre Jolivet     }
2846de25e9cbSPierre Jolivet 
2847d0609cedSBarry Smith     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
28489566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
284910c56fdeSHong Zhang     for (i = 0; i < mbs; i++) {
28509566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
28514dcd73b1SHong Zhang       nnz = nnz / bs;
28524dcd73b1SHong Zhang       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
28539566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
28549566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
28554dcd73b1SHong Zhang     }
28569566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28579566063dSJacob Faibussowitsch     PetscCall(PetscFree(bindx));
28584dcd73b1SHong Zhang 
28599566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
28609566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
28619566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
28629566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, MATSBAIJ));
28639566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
28649566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2865d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
28664dcd73b1SHong Zhang   }
28674dcd73b1SHong Zhang 
286810c56fdeSHong Zhang   /* numeric phase */
28699566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28709566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
28714dcd73b1SHong Zhang 
28729566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
28734dcd73b1SHong Zhang   for (i = 0; i < m; i++) {
28749566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28754dcd73b1SHong Zhang     Ii = i + rstart;
28769566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
28779566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28784dcd73b1SHong Zhang   }
28799566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28809566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
28819566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
28823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28834dcd73b1SHong Zhang }
2884