xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a8f51744601f91dc30d5f98153b1ecd91eebb0e5)
1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
2c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
4c6db04a5SJed Brown #include <petscblaslapack.h>
5a30f8f8cSSatish Balay 
626cec326SBarry Smith PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
726cec326SBarry Smith {
826cec326SBarry Smith   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
926cec326SBarry Smith 
1026cec326SBarry Smith   PetscFunctionBegin;
1126cec326SBarry Smith #if defined(PETSC_USE_LOG)
1226cec326SBarry Smith   PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
1326cec326SBarry Smith #endif
1426cec326SBarry Smith   PetscCall(MatStashDestroy_Private(&mat->stash));
1526cec326SBarry Smith   PetscCall(MatStashDestroy_Private(&mat->bstash));
1626cec326SBarry Smith   PetscCall(MatDestroy(&baij->A));
1726cec326SBarry Smith   PetscCall(MatDestroy(&baij->B));
1826cec326SBarry Smith #if defined(PETSC_USE_CTABLE)
1926cec326SBarry Smith   PetscCall(PetscHMapIDestroy(&baij->colmap));
2026cec326SBarry Smith #else
2126cec326SBarry Smith   PetscCall(PetscFree(baij->colmap));
2226cec326SBarry Smith #endif
2326cec326SBarry Smith   PetscCall(PetscFree(baij->garray));
2426cec326SBarry Smith   PetscCall(VecDestroy(&baij->lvec));
2526cec326SBarry Smith   PetscCall(VecScatterDestroy(&baij->Mvctx));
2626cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec0));
2726cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec0b));
2826cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1));
2926cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1a));
3026cec326SBarry Smith   PetscCall(VecDestroy(&baij->slvec1b));
3126cec326SBarry Smith   PetscCall(VecScatterDestroy(&baij->sMvctx));
3226cec326SBarry Smith   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
3326cec326SBarry Smith   PetscCall(PetscFree(baij->barray));
3426cec326SBarry Smith   PetscCall(PetscFree(baij->hd));
3526cec326SBarry Smith   PetscCall(VecDestroy(&baij->diag));
3626cec326SBarry Smith   PetscCall(VecDestroy(&baij->bb1));
3726cec326SBarry Smith   PetscCall(VecDestroy(&baij->xx1));
3826cec326SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
3926cec326SBarry Smith   PetscCall(PetscFree(baij->setvaluescopy));
4026cec326SBarry Smith #endif
4126cec326SBarry Smith   PetscCall(PetscFree(baij->in_loc));
4226cec326SBarry Smith   PetscCall(PetscFree(baij->v_loc));
4326cec326SBarry Smith   PetscCall(PetscFree(baij->rangebs));
4426cec326SBarry Smith   PetscCall(PetscFree(mat->data));
4526cec326SBarry Smith 
4626cec326SBarry Smith   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
4726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
4826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
4926cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
5026cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
5126cec326SBarry Smith #if defined(PETSC_HAVE_ELEMENTAL)
5226cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
5326cec326SBarry Smith #endif
5426cec326SBarry Smith #if defined(PETSC_HAVE_SCALAPACK)
5526cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
5626cec326SBarry Smith #endif
5726cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
5826cec326SBarry Smith   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
5926cec326SBarry Smith   PetscFunctionReturn(PETSC_SUCCESS);
6026cec326SBarry Smith }
6126cec326SBarry Smith 
6226cec326SBarry Smith /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
6326cec326SBarry Smith #define TYPE SBAIJ
6426cec326SBarry Smith #define TYPE_SBAIJ
6526cec326SBarry Smith #include "../src/mat/impls/aij/mpi/mpihashmat.h"
6626cec326SBarry Smith #undef TYPE
6726cec326SBarry Smith #undef TYPE_SBAIJ
6826cec326SBarry Smith 
696214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
70cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
716214f412SHong Zhang #endif
72d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
73d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74d24d4204SJose E. Roman #endif
75b147fbf3SStefano Zampini 
76b147fbf3SStefano Zampini /* This could be moved to matimpl.h */
77d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78d71ae5a4SJacob Faibussowitsch {
79b147fbf3SStefano Zampini   Mat       preallocator;
80b147fbf3SStefano Zampini   PetscInt  r, rstart, rend;
81b147fbf3SStefano Zampini   PetscInt  bs, i, m, n, M, N;
82b147fbf3SStefano Zampini   PetscBool cong = PETSC_TRUE;
83b147fbf3SStefano Zampini 
84b147fbf3SStefano Zampini   PetscFunctionBegin;
85b147fbf3SStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
86b147fbf3SStefano Zampini   PetscValidLogicalCollectiveInt(B, nm, 2);
87b147fbf3SStefano Zampini   for (i = 0; i < nm; i++) {
88b147fbf3SStefano Zampini     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
899566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
905f80ce2aSJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91b147fbf3SStefano Zampini   }
92b147fbf3SStefano Zampini   PetscValidLogicalCollectiveBool(B, fill, 5);
939566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(B, &bs));
949566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, &M, &N));
959566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(B, &m, &n));
969566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
979566063dSJacob Faibussowitsch   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
989566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(preallocator, bs));
999566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(preallocator, m, n, M, N));
1009566063dSJacob Faibussowitsch   PetscCall(MatSetUp(preallocator));
1019566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102b147fbf3SStefano Zampini   for (r = rstart; r < rend; ++r) {
103b147fbf3SStefano Zampini     PetscInt           ncols;
104b147fbf3SStefano Zampini     const PetscInt    *row;
105b147fbf3SStefano Zampini     const PetscScalar *vals;
106b147fbf3SStefano Zampini 
107b147fbf3SStefano Zampini     for (i = 0; i < nm; i++) {
1089566063dSJacob Faibussowitsch       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
1099566063dSJacob Faibussowitsch       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
11048a46eb9SPierre Jolivet       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
1119566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112b147fbf3SStefano Zampini     }
113b147fbf3SStefano Zampini   }
1149566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
1159566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
1169566063dSJacob Faibussowitsch   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
1179566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&preallocator));
1183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
119b147fbf3SStefano Zampini }
120b147fbf3SStefano Zampini 
121d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122d71ae5a4SJacob Faibussowitsch {
123b147fbf3SStefano Zampini   Mat      B;
124b147fbf3SStefano Zampini   PetscInt r;
125b147fbf3SStefano Zampini 
126b147fbf3SStefano Zampini   PetscFunctionBegin;
127b147fbf3SStefano Zampini   if (reuse != MAT_REUSE_MATRIX) {
12828d58a37SPierre Jolivet     PetscBool symm = PETSC_TRUE, isdense;
129b147fbf3SStefano Zampini     PetscInt  bs;
130b147fbf3SStefano Zampini 
1319566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
1329566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
1339566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, newtype));
1349566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSize(A, &bs));
1359566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSize(B, bs));
1369566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
1379566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
1389566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
13928d58a37SPierre Jolivet     if (!isdense) {
1409566063dSJacob Faibussowitsch       PetscCall(MatGetRowUpperTriangular(A));
1419566063dSJacob Faibussowitsch       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
1429566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowUpperTriangular(A));
14328d58a37SPierre Jolivet     } else {
1449566063dSJacob Faibussowitsch       PetscCall(MatSetUp(B));
14528d58a37SPierre Jolivet     }
14628d58a37SPierre Jolivet   } else {
14728d58a37SPierre Jolivet     B = *newmat;
1489566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(B));
14928d58a37SPierre Jolivet   }
150b147fbf3SStefano Zampini 
1519566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
152b147fbf3SStefano Zampini   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153b147fbf3SStefano Zampini     PetscInt           ncols;
154b147fbf3SStefano Zampini     const PetscInt    *row;
155b147fbf3SStefano Zampini     const PetscScalar *vals;
156b147fbf3SStefano Zampini 
1579566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
1589566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
160b94d7dedSBarry Smith     if (A->hermitian == PETSC_BOOL3_TRUE) {
161eb1ec7c1SStefano Zampini       PetscInt i;
16248a46eb9SPierre Jolivet       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163eb1ec7c1SStefano Zampini     } else {
1649566063dSJacob Faibussowitsch       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165eb1ec7c1SStefano Zampini     }
166eb1ec7c1SStefano Zampini #else
1679566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168eb1ec7c1SStefano Zampini #endif
1699566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170b147fbf3SStefano Zampini   }
1719566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
1729566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1739566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
174b147fbf3SStefano Zampini 
175b147fbf3SStefano Zampini   if (reuse == MAT_INPLACE_MATRIX) {
1769566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
177b147fbf3SStefano Zampini   } else {
178b147fbf3SStefano Zampini     *newmat = B;
179b147fbf3SStefano Zampini   }
1803ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
181b147fbf3SStefano Zampini }
182b147fbf3SStefano Zampini 
183d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184d71ae5a4SJacob Faibussowitsch {
185f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
186a30f8f8cSSatish Balay 
187a30f8f8cSSatish Balay   PetscFunctionBegin;
1889566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
1899566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
1903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
191a30f8f8cSSatish Balay }
192a30f8f8cSSatish Balay 
193d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194d71ae5a4SJacob Faibussowitsch {
195f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
196a30f8f8cSSatish Balay 
197a30f8f8cSSatish Balay   PetscFunctionBegin;
1989566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
1999566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
2003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
201a30f8f8cSSatish Balay }
202a30f8f8cSSatish Balay 
203d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204*a8f51744SPierre Jolivet   do { \
205a30f8f8cSSatish Balay     brow = row / bs; \
2069371c9d4SSatish Balay     rp   = aj + ai[brow]; \
2079371c9d4SSatish Balay     ap   = aa + bs2 * ai[brow]; \
2089371c9d4SSatish Balay     rmax = aimax[brow]; \
2099371c9d4SSatish Balay     nrow = ailen[brow]; \
210a30f8f8cSSatish Balay     bcol = col / bs; \
2119371c9d4SSatish Balay     ridx = row % bs; \
2129371c9d4SSatish Balay     cidx = col % bs; \
2139371c9d4SSatish Balay     low  = 0; \
2149371c9d4SSatish Balay     high = nrow; \
215a30f8f8cSSatish Balay     while (high - low > 3) { \
216a30f8f8cSSatish Balay       t = (low + high) / 2; \
217a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
218a30f8f8cSSatish Balay       else low = t; \
219a30f8f8cSSatish Balay     } \
220a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
221a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
222a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
223a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
224a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
225a30f8f8cSSatish Balay         else *bap = value; \
226a30f8f8cSSatish Balay         goto a_noinsert; \
227a30f8f8cSSatish Balay       } \
228a30f8f8cSSatish Balay     } \
229a30f8f8cSSatish Balay     if (a->nonew == 1) goto a_noinsert; \
2305f80ce2aSJacob Faibussowitsch     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232a30f8f8cSSatish Balay     N = nrow++ - 1; \
233a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2349566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2359566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2369566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
238a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
239e56f5c9eSBarry Smith     A->nonzerostate++; \
240a30f8f8cSSatish Balay   a_noinsert:; \
241a30f8f8cSSatish Balay     ailen[brow] = nrow; \
242*a8f51744SPierre Jolivet   } while (0)
243e5e170daSBarry Smith 
244d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245*a8f51744SPierre Jolivet   do { \
246a30f8f8cSSatish Balay     brow = row / bs; \
2479371c9d4SSatish Balay     rp   = bj + bi[brow]; \
2489371c9d4SSatish Balay     ap   = ba + bs2 * bi[brow]; \
2499371c9d4SSatish Balay     rmax = bimax[brow]; \
2509371c9d4SSatish Balay     nrow = bilen[brow]; \
251a30f8f8cSSatish Balay     bcol = col / bs; \
2529371c9d4SSatish Balay     ridx = row % bs; \
2539371c9d4SSatish Balay     cidx = col % bs; \
2549371c9d4SSatish Balay     low  = 0; \
2559371c9d4SSatish Balay     high = nrow; \
256a30f8f8cSSatish Balay     while (high - low > 3) { \
257a30f8f8cSSatish Balay       t = (low + high) / 2; \
258a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
259a30f8f8cSSatish Balay       else low = t; \
260a30f8f8cSSatish Balay     } \
261a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
262a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
263a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
264a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
265a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
266a30f8f8cSSatish Balay         else *bap = value; \
267a30f8f8cSSatish Balay         goto b_noinsert; \
268a30f8f8cSSatish Balay       } \
269a30f8f8cSSatish Balay     } \
270a30f8f8cSSatish Balay     if (b->nonew == 1) goto b_noinsert; \
2715f80ce2aSJacob Faibussowitsch     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273a30f8f8cSSatish Balay     N = nrow++ - 1; \
274a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2759566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2769566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2779566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
279a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
280e56f5c9eSBarry Smith     B->nonzerostate++; \
281a30f8f8cSSatish Balay   b_noinsert:; \
282a30f8f8cSSatish Balay     bilen[brow] = nrow; \
283*a8f51744SPierre Jolivet   } while (0)
284a30f8f8cSSatish Balay 
285a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
286da81f932SPierre Jolivet    Any a(i,j) with i>j input by user is ignored or generates an error
287a30f8f8cSSatish Balay */
288d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289d71ae5a4SJacob Faibussowitsch {
290a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291a30f8f8cSSatish Balay   MatScalar     value;
292ace3abfcSBarry Smith   PetscBool     roworiented = baij->roworiented;
2931302d50aSBarry Smith   PetscInt      i, j, row, col;
294d0f46423SBarry Smith   PetscInt      rstart_orig = mat->rmap->rstart;
295d0f46423SBarry Smith   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296d0f46423SBarry Smith   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
297a30f8f8cSSatish Balay 
298a30f8f8cSSatish Balay   /* Some Variables required in the macro */
299a30f8f8cSSatish Balay   Mat           A     = baij->A;
300a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
3011302d50aSBarry Smith   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302a30f8f8cSSatish Balay   MatScalar    *aa = a->a;
303a30f8f8cSSatish Balay 
304a30f8f8cSSatish Balay   Mat          B     = baij->B;
305a30f8f8cSSatish Balay   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
3061302d50aSBarry Smith   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307a30f8f8cSSatish Balay   MatScalar   *ba = b->a;
308a30f8f8cSSatish Balay 
3091302d50aSBarry Smith   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
3101302d50aSBarry Smith   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
311a30f8f8cSSatish Balay   MatScalar *ap, *bap;
312a30f8f8cSSatish Balay 
313a30f8f8cSSatish Balay   /* for stash */
3140298fd71SBarry Smith   PetscInt   n_loc, *in_loc = NULL;
3150298fd71SBarry Smith   MatScalar *v_loc = NULL;
316a30f8f8cSSatish Balay 
317a30f8f8cSSatish Balay   PetscFunctionBegin;
318a30f8f8cSSatish Balay   if (!baij->donotstash) {
31959ffdab8SBarry Smith     if (n > baij->n_loc) {
3209566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->in_loc));
3219566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->v_loc));
3229566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->in_loc));
3239566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->v_loc));
32426fbe8dcSKarl Rupp 
32559ffdab8SBarry Smith       baij->n_loc = n;
32659ffdab8SBarry Smith     }
32759ffdab8SBarry Smith     in_loc = baij->in_loc;
32859ffdab8SBarry Smith     v_loc  = baij->v_loc;
329a30f8f8cSSatish Balay   }
330a30f8f8cSSatish Balay 
331a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
332a30f8f8cSSatish Balay     if (im[i] < 0) continue;
3335f80ce2aSJacob Faibussowitsch     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335a30f8f8cSSatish Balay       row = im[i] - rstart_orig;                     /* local row index */
336a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
33701b2bd88SHong Zhang         if (im[i] / bs > in[j] / bs) {
33801b2bd88SHong Zhang           if (a->ignore_ltriangular) {
33901b2bd88SHong Zhang             continue; /* ignore lower triangular blocks */
34026fbe8dcSKarl Rupp           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
34101b2bd88SHong Zhang         }
342a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343a30f8f8cSSatish Balay           col  = in[j] - cstart_orig;                    /* local col index */
3449371c9d4SSatish Balay           brow = row / bs;
3459371c9d4SSatish Balay           bcol = col / bs;
346a30f8f8cSSatish Balay           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
348db4deed7SKarl Rupp           else value = v[i + j * m];
349d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
3509566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
352f7d195e4SLawrence Mitchell           continue;
353f7d195e4SLawrence Mitchell         } else {
354f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355f7d195e4SLawrence Mitchell           /* off-diag entry (B) */
356a30f8f8cSSatish Balay           if (mat->was_assembled) {
35748a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
359eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
36071730473SSatish Balay             col = col - 1;
361a30f8f8cSSatish Balay #else
36271730473SSatish Balay             col = baij->colmap[in[j] / bs] - 1;
363a30f8f8cSSatish Balay #endif
364a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
3659566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
366a30f8f8cSSatish Balay               col = in[j];
367a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368a30f8f8cSSatish Balay               B     = baij->B;
369a30f8f8cSSatish Balay               b     = (Mat_SeqBAIJ *)(B)->data;
3709371c9d4SSatish Balay               bimax = b->imax;
3719371c9d4SSatish Balay               bi    = b->i;
3729371c9d4SSatish Balay               bilen = b->ilen;
3739371c9d4SSatish Balay               bj    = b->j;
374a30f8f8cSSatish Balay               ba    = b->a;
37571730473SSatish Balay             } else col += in[j] % bs;
376a30f8f8cSSatish Balay           } else col = in[j];
377db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
378db4deed7SKarl Rupp           else value = v[i + j * m];
379d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
3809566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381a30f8f8cSSatish Balay         }
382a30f8f8cSSatish Balay       }
383a30f8f8cSSatish Balay     } else { /* off processor entry */
3845f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385a30f8f8cSSatish Balay       if (!baij->donotstash) {
3865080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
387a30f8f8cSSatish Balay         n_loc          = 0;
388a30f8f8cSSatish Balay         for (j = 0; j < n; j++) {
389f65c83cfSHong Zhang           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
391a30f8f8cSSatish Balay           if (roworiented) {
392a30f8f8cSSatish Balay             v_loc[n_loc] = v[i * n + j];
393a30f8f8cSSatish Balay           } else {
394a30f8f8cSSatish Balay             v_loc[n_loc] = v[j * m + i];
395a30f8f8cSSatish Balay           }
396a30f8f8cSSatish Balay           n_loc++;
397a30f8f8cSSatish Balay         }
3989566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399a30f8f8cSSatish Balay       }
400a30f8f8cSSatish Balay     }
401a30f8f8cSSatish Balay   }
4023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
403a30f8f8cSSatish Balay }
404a30f8f8cSSatish Balay 
405d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406d71ae5a4SJacob Faibussowitsch {
40736bd2089SBarry Smith   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
40836bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
40936bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
41036bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
41136bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
41236bd2089SBarry Smith   const PetscScalar *value       = v;
41336bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
41436bd2089SBarry Smith 
41536bd2089SBarry Smith   PetscFunctionBegin;
41636bd2089SBarry Smith   if (col < row) {
4173ba16761SJacob Faibussowitsch     if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
41836bd2089SBarry Smith     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
41936bd2089SBarry Smith   }
42036bd2089SBarry Smith   rp    = aj + ai[row];
42136bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
42236bd2089SBarry Smith   rmax  = imax[row];
42336bd2089SBarry Smith   nrow  = ailen[row];
42436bd2089SBarry Smith   value = v;
42536bd2089SBarry Smith   low   = 0;
42636bd2089SBarry Smith   high  = nrow;
42736bd2089SBarry Smith 
42836bd2089SBarry Smith   while (high - low > 7) {
42936bd2089SBarry Smith     t = (low + high) / 2;
43036bd2089SBarry Smith     if (rp[t] > col) high = t;
43136bd2089SBarry Smith     else low = t;
43236bd2089SBarry Smith   }
43336bd2089SBarry Smith   for (i = low; i < high; i++) {
43436bd2089SBarry Smith     if (rp[i] > col) break;
43536bd2089SBarry Smith     if (rp[i] == col) {
43636bd2089SBarry Smith       bap = ap + bs2 * i;
43736bd2089SBarry Smith       if (roworiented) {
43836bd2089SBarry Smith         if (is == ADD_VALUES) {
43936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
440ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
44136bd2089SBarry Smith           }
44236bd2089SBarry Smith         } else {
44336bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
444ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
44536bd2089SBarry Smith           }
44636bd2089SBarry Smith         }
44736bd2089SBarry Smith       } else {
44836bd2089SBarry Smith         if (is == ADD_VALUES) {
44936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
450ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
45136bd2089SBarry Smith           }
45236bd2089SBarry Smith         } else {
45336bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
454ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
45536bd2089SBarry Smith           }
45636bd2089SBarry Smith         }
45736bd2089SBarry Smith       }
45836bd2089SBarry Smith       goto noinsert2;
45936bd2089SBarry Smith     }
46036bd2089SBarry Smith   }
46136bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
4625f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
46336bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4649371c9d4SSatish Balay   N = nrow++ - 1;
4659371c9d4SSatish Balay   high++;
46636bd2089SBarry Smith   /* shift up all the later entries in this row */
4679566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4689566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
46936bd2089SBarry Smith   rp[i] = col;
47036bd2089SBarry Smith   bap   = ap + bs2 * i;
47136bd2089SBarry Smith   if (roworiented) {
47236bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
473ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
47436bd2089SBarry Smith     }
47536bd2089SBarry Smith   } else {
47636bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
477ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
47836bd2089SBarry Smith     }
47936bd2089SBarry Smith   }
48036bd2089SBarry Smith noinsert2:;
48136bd2089SBarry Smith   ailen[row] = nrow;
4823ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
48336bd2089SBarry Smith }
48436bd2089SBarry Smith 
48536bd2089SBarry Smith /*
48636bd2089SBarry Smith    This routine is exactly duplicated in mpibaij.c
48736bd2089SBarry Smith */
488d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489d71ae5a4SJacob Faibussowitsch {
49036bd2089SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
49136bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
49236bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
49336bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
49436bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
49536bd2089SBarry Smith   const PetscScalar *value       = v;
49636bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
49736bd2089SBarry Smith 
49836bd2089SBarry Smith   PetscFunctionBegin;
49936bd2089SBarry Smith   rp    = aj + ai[row];
50036bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
50136bd2089SBarry Smith   rmax  = imax[row];
50236bd2089SBarry Smith   nrow  = ailen[row];
50336bd2089SBarry Smith   low   = 0;
50436bd2089SBarry Smith   high  = nrow;
50536bd2089SBarry Smith   value = v;
50636bd2089SBarry Smith   while (high - low > 7) {
50736bd2089SBarry Smith     t = (low + high) / 2;
50836bd2089SBarry Smith     if (rp[t] > col) high = t;
50936bd2089SBarry Smith     else low = t;
51036bd2089SBarry Smith   }
51136bd2089SBarry Smith   for (i = low; i < high; i++) {
51236bd2089SBarry Smith     if (rp[i] > col) break;
51336bd2089SBarry Smith     if (rp[i] == col) {
51436bd2089SBarry Smith       bap = ap + bs2 * i;
51536bd2089SBarry Smith       if (roworiented) {
51636bd2089SBarry Smith         if (is == ADD_VALUES) {
51736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
518ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
51936bd2089SBarry Smith           }
52036bd2089SBarry Smith         } else {
52136bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
522ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
52336bd2089SBarry Smith           }
52436bd2089SBarry Smith         }
52536bd2089SBarry Smith       } else {
52636bd2089SBarry Smith         if (is == ADD_VALUES) {
52736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
528ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
52936bd2089SBarry Smith             bap += bs;
53036bd2089SBarry Smith           }
53136bd2089SBarry Smith         } else {
53236bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
533ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
53436bd2089SBarry Smith             bap += bs;
53536bd2089SBarry Smith           }
53636bd2089SBarry Smith         }
53736bd2089SBarry Smith       }
53836bd2089SBarry Smith       goto noinsert2;
53936bd2089SBarry Smith     }
54036bd2089SBarry Smith   }
54136bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
5425f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
54336bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
5449371c9d4SSatish Balay   N = nrow++ - 1;
5459371c9d4SSatish Balay   high++;
54636bd2089SBarry Smith   /* shift up all the later entries in this row */
5479566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
5489566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
54936bd2089SBarry Smith   rp[i] = col;
55036bd2089SBarry Smith   bap   = ap + bs2 * i;
55136bd2089SBarry Smith   if (roworiented) {
55236bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
553ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
55436bd2089SBarry Smith     }
55536bd2089SBarry Smith   } else {
55636bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
557ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
55836bd2089SBarry Smith     }
55936bd2089SBarry Smith   }
56036bd2089SBarry Smith noinsert2:;
56136bd2089SBarry Smith   ailen[row] = nrow;
5623ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
56336bd2089SBarry Smith }
56436bd2089SBarry Smith 
56536bd2089SBarry Smith /*
56636bd2089SBarry Smith     This routine could be optimized by removing the need for the block copy below and passing stride information
56736bd2089SBarry Smith   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
56836bd2089SBarry Smith */
569d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570d71ae5a4SJacob Faibussowitsch {
5710880e062SHong Zhang   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
572f15d580aSBarry Smith   const MatScalar *value;
573f15d580aSBarry Smith   MatScalar       *barray      = baij->barray;
574ace3abfcSBarry Smith   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575899cda47SBarry Smith   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
576476417e5SBarry Smith   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577476417e5SBarry Smith   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
5780880e062SHong Zhang 
579a30f8f8cSSatish Balay   PetscFunctionBegin;
5800880e062SHong Zhang   if (!barray) {
5819566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2, &barray));
5820880e062SHong Zhang     baij->barray = barray;
5830880e062SHong Zhang   }
5840880e062SHong Zhang 
5850880e062SHong Zhang   if (roworiented) {
5860880e062SHong Zhang     stepval = (n - 1) * bs;
5870880e062SHong Zhang   } else {
5880880e062SHong Zhang     stepval = (m - 1) * bs;
5890880e062SHong Zhang   }
5900880e062SHong Zhang   for (i = 0; i < m; i++) {
5910880e062SHong Zhang     if (im[i] < 0) continue;
5926bdcaf15SBarry Smith     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
5930880e062SHong Zhang     if (im[i] >= rstart && im[i] < rend) {
5940880e062SHong Zhang       row = im[i] - rstart;
5950880e062SHong Zhang       for (j = 0; j < n; j++) {
596f3f98c53SJed Brown         if (im[i] > in[j]) {
597f3f98c53SJed Brown           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598e32f2f54SBarry Smith           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599f3f98c53SJed Brown         }
6000880e062SHong Zhang         /* If NumCol = 1 then a copy is not required */
6010880e062SHong Zhang         if ((roworiented) && (n == 1)) {
602f15d580aSBarry Smith           barray = (MatScalar *)v + i * bs2;
6030880e062SHong Zhang         } else if ((!roworiented) && (m == 1)) {
604f15d580aSBarry Smith           barray = (MatScalar *)v + j * bs2;
6050880e062SHong Zhang         } else { /* Here a copy is required */
6060880e062SHong Zhang           if (roworiented) {
6070880e062SHong Zhang             value = v + i * (stepval + bs) * bs + j * bs;
6080880e062SHong Zhang           } else {
6090880e062SHong Zhang             value = v + j * (stepval + bs) * bs + i * bs;
6100880e062SHong Zhang           }
6110880e062SHong Zhang           for (ii = 0; ii < bs; ii++, value += stepval) {
612ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
6130880e062SHong Zhang           }
6140880e062SHong Zhang           barray -= bs2;
6150880e062SHong Zhang         }
6160880e062SHong Zhang 
6170880e062SHong Zhang         if (in[j] >= cstart && in[j] < cend) {
6180880e062SHong Zhang           col = in[j] - cstart;
6199566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
621f7d195e4SLawrence Mitchell           continue;
622f7d195e4SLawrence Mitchell         } else {
623f7d195e4SLawrence Mitchell           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
6240880e062SHong Zhang           if (mat->was_assembled) {
62548a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
6260880e062SHong Zhang 
6272515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
6280880e062SHong Zhang   #if defined(PETSC_USE_CTABLE)
6299371c9d4SSatish Balay             {
6309371c9d4SSatish Balay               PetscInt data;
631eec179cfSJacob Faibussowitsch               PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
63208401ef6SPierre Jolivet               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
6330880e062SHong Zhang             }
6340880e062SHong Zhang   #else
63508401ef6SPierre Jolivet             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
6360880e062SHong Zhang   #endif
6370880e062SHong Zhang #endif
6380880e062SHong Zhang #if defined(PETSC_USE_CTABLE)
639eec179cfSJacob Faibussowitsch             PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
6400880e062SHong Zhang             col = (col - 1) / bs;
6410880e062SHong Zhang #else
6420880e062SHong Zhang             col = (baij->colmap[in[j]] - 1) / bs;
6430880e062SHong Zhang #endif
6440880e062SHong Zhang             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
6459566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
6460880e062SHong Zhang               col = in[j];
6470880e062SHong Zhang             }
64826fbe8dcSKarl Rupp           } else col = in[j];
6499566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
6500880e062SHong Zhang         }
6510880e062SHong Zhang       }
6520880e062SHong Zhang     } else {
6535f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
6540880e062SHong Zhang       if (!baij->donotstash) {
6550880e062SHong Zhang         if (roworiented) {
6569566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
6570880e062SHong Zhang         } else {
6589566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
6590880e062SHong Zhang         }
6600880e062SHong Zhang       }
6610880e062SHong Zhang     }
6620880e062SHong Zhang   }
6633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
664a30f8f8cSSatish Balay }
665a30f8f8cSSatish Balay 
666d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667d71ae5a4SJacob Faibussowitsch {
668f3566a2aSHong Zhang   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669d0f46423SBarry Smith   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670d0f46423SBarry Smith   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
671a30f8f8cSSatish Balay 
672a30f8f8cSSatish Balay   PetscFunctionBegin;
673a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
67454c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
67554c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
678a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
67954c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
68054c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
682a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
6839566063dSJacob Faibussowitsch           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684a30f8f8cSSatish Balay         } else {
68548a46eb9SPierre Jolivet           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
687eec179cfSJacob Faibussowitsch           PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688a30f8f8cSSatish Balay           data--;
689a30f8f8cSSatish Balay #else
690a30f8f8cSSatish Balay           data = baij->colmap[idxn[j] / bs] - 1;
691a30f8f8cSSatish Balay #endif
692a30f8f8cSSatish Balay           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693a30f8f8cSSatish Balay           else {
694a30f8f8cSSatish Balay             col = data + idxn[j] % bs;
6959566063dSJacob Faibussowitsch             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696a30f8f8cSSatish Balay           }
697a30f8f8cSSatish Balay         }
698a30f8f8cSSatish Balay       }
699f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700a30f8f8cSSatish Balay   }
7013ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
702a30f8f8cSSatish Balay }
703a30f8f8cSSatish Balay 
704d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705d71ae5a4SJacob Faibussowitsch {
706a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707a30f8f8cSSatish Balay   PetscReal     sum[2], *lnorm2;
708a30f8f8cSSatish Balay 
709a30f8f8cSSatish Balay   PetscFunctionBegin;
710a30f8f8cSSatish Balay   if (baij->size == 1) {
7119566063dSJacob Faibussowitsch     PetscCall(MatNorm(baij->A, type, norm));
712a30f8f8cSSatish Balay   } else {
713a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
7149566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(2, &lnorm2));
7159566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->A, type, lnorm2));
7169371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
7179371c9d4SSatish Balay       lnorm2++; /* squar power of norm(A) */
7189566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->B, type, lnorm2));
7199371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
7209371c9d4SSatish Balay       lnorm2--; /* squar power of norm(B) */
7211c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7228f1a2a5eSBarry Smith       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
7239566063dSJacob Faibussowitsch       PetscCall(PetscFree(lnorm2));
7240b8dc8d2SHong Zhang     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
7250b8dc8d2SHong Zhang       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
7260b8dc8d2SHong Zhang       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
7270b8dc8d2SHong Zhang       PetscReal    *rsum, *rsum2, vabs;
728899cda47SBarry Smith       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729d0f46423SBarry Smith       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
7300b8dc8d2SHong Zhang       MatScalar    *v;
7310b8dc8d2SHong Zhang 
7329566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
7339566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
7340b8dc8d2SHong Zhang       /* Amat */
7359371c9d4SSatish Balay       v  = amat->a;
7369371c9d4SSatish Balay       jj = amat->j;
7370b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
7380b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
7390b8dc8d2SHong Zhang         nz   = amat->i[brow + 1] - amat->i[brow];
7400b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
7419371c9d4SSatish Balay           gcol = bs * (rstart + *jj);
7429371c9d4SSatish Balay           jj++;
7430b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7440b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7459371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7469371c9d4SSatish Balay               v++;
7470b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7480b8dc8d2SHong Zhang               /* non-diagonal block */
7490b8dc8d2SHong Zhang               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
7500b8dc8d2SHong Zhang             }
7510b8dc8d2SHong Zhang           }
7520b8dc8d2SHong Zhang         }
7539566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7540b8dc8d2SHong Zhang       }
7550b8dc8d2SHong Zhang       /* Bmat */
7569371c9d4SSatish Balay       v  = bmat->a;
7579371c9d4SSatish Balay       jj = bmat->j;
7580b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
7590b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
7600b8dc8d2SHong Zhang         nz   = bmat->i[brow + 1] - bmat->i[brow];
7610b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
7629371c9d4SSatish Balay           gcol = bs * garray[*jj];
7639371c9d4SSatish Balay           jj++;
7640b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7650b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7669371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7679371c9d4SSatish Balay               v++;
7680b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7690b8dc8d2SHong Zhang               rsum[grow + row] += vabs;
7700b8dc8d2SHong Zhang             }
7710b8dc8d2SHong Zhang           }
7720b8dc8d2SHong Zhang         }
7739566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7740b8dc8d2SHong Zhang       }
7751c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7760b8dc8d2SHong Zhang       *norm = 0.0;
777d0f46423SBarry Smith       for (col = 0; col < mat->cmap->N; col++) {
7780b8dc8d2SHong Zhang         if (rsum2[col] > *norm) *norm = rsum2[col];
7790b8dc8d2SHong Zhang       }
7809566063dSJacob Faibussowitsch       PetscCall(PetscFree2(rsum, rsum2));
781f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782a30f8f8cSSatish Balay   }
7833ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
784a30f8f8cSSatish Balay }
785a30f8f8cSSatish Balay 
786d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787d71ae5a4SJacob Faibussowitsch {
788a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
7891302d50aSBarry Smith   PetscInt      nstash, reallocs;
790a30f8f8cSSatish Balay 
791a30f8f8cSSatish Balay   PetscFunctionBegin;
7923ba16761SJacob Faibussowitsch   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
793a30f8f8cSSatish Balay 
7949566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
7959566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
7969566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7979566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
7989566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7999566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
8003ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
801a30f8f8cSSatish Balay }
802a30f8f8cSSatish Balay 
803d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804d71ae5a4SJacob Faibussowitsch {
805a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
80713f74950SBarry Smith   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
808e44c0bd4SBarry Smith   PetscInt     *row, *col;
809ace3abfcSBarry Smith   PetscBool     other_disassembled;
81013f74950SBarry Smith   PetscMPIInt   n;
811ace3abfcSBarry Smith   PetscBool     r1, r2, r3;
812a30f8f8cSSatish Balay   MatScalar    *val;
813a30f8f8cSSatish Balay 
81491c97fd4SSatish Balay   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815a30f8f8cSSatish Balay   PetscFunctionBegin;
8164cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
817a30f8f8cSSatish Balay     while (1) {
8189566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819a30f8f8cSSatish Balay       if (!flg) break;
820a30f8f8cSSatish Balay 
821a30f8f8cSSatish Balay       for (i = 0; i < n;) {
822a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
82326fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
82426fbe8dcSKarl Rupp           if (row[j] != rstart) break;
82526fbe8dcSKarl Rupp         }
826a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
827a30f8f8cSSatish Balay         else ncols = n - i;
828a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
8299566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830a30f8f8cSSatish Balay         i = j;
831a30f8f8cSSatish Balay       }
832a30f8f8cSSatish Balay     }
8339566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
834a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
835a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
836a30f8f8cSSatish Balay        restore the original flags */
837a30f8f8cSSatish Balay     r1 = baij->roworiented;
838a30f8f8cSSatish Balay     r2 = a->roworiented;
83991c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
84026fbe8dcSKarl Rupp 
841a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
842a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
84326fbe8dcSKarl Rupp 
84491c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845a30f8f8cSSatish Balay     while (1) {
8469566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847a30f8f8cSSatish Balay       if (!flg) break;
848a30f8f8cSSatish Balay 
849a30f8f8cSSatish Balay       for (i = 0; i < n;) {
850a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
85126fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
85226fbe8dcSKarl Rupp           if (row[j] != rstart) break;
85326fbe8dcSKarl Rupp         }
854a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
855a30f8f8cSSatish Balay         else ncols = n - i;
8569566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857a30f8f8cSSatish Balay         i = j;
858a30f8f8cSSatish Balay       }
859a30f8f8cSSatish Balay     }
8609566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
86126fbe8dcSKarl Rupp 
862a30f8f8cSSatish Balay     baij->roworiented = r1;
863a30f8f8cSSatish Balay     a->roworiented    = r2;
86426fbe8dcSKarl Rupp 
86591c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866a30f8f8cSSatish Balay   }
867a30f8f8cSSatish Balay 
8689566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->A, mode));
8699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->A, mode));
870a30f8f8cSSatish Balay 
871a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
8726aad120cSJose E. Roman      also disassemble ourselves, in order that we may reassemble. */
873a30f8f8cSSatish Balay   /*
874a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
875a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
876a30f8f8cSSatish Balay   */
877a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
8785f9db2b2SJunchao Zhang     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
87948a46eb9SPierre Jolivet     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880a30f8f8cSSatish Balay   }
881a30f8f8cSSatish Balay 
8829371c9d4SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
8839566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->B, mode));
8849566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->B, mode));
885a30f8f8cSSatish Balay 
8869566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
88726fbe8dcSKarl Rupp 
888f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
8894f9cfa9eSBarry Smith 
8904f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
8914f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
8931c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894e56f5c9eSBarry Smith   }
8953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
896a30f8f8cSSatish Balay }
897a30f8f8cSSatish Balay 
898dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
8999804daf3SBarry Smith #include <petscdraw.h>
900d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901d71ae5a4SJacob Faibussowitsch {
902a30f8f8cSSatish Balay   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
903d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
9047da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
905ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
906b0a32e0cSBarry Smith   PetscViewer       sviewer;
907f3ef73ceSBarry Smith   PetscViewerFormat format;
908a30f8f8cSSatish Balay 
909a30f8f8cSSatish Balay   PetscFunctionBegin;
9109566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
9119566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
91232077d6dSBarry Smith   if (iascii) {
9139566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
914456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915a30f8f8cSSatish Balay       MatInfo info;
9169566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
9179566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
9189566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9199371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
9209371c9d4SSatish Balay                                                    mat->rmap->bs, (double)info.memory));
9219566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
9229566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
9239566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
9249566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
9259566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9269566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
9279566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
9289566063dSJacob Faibussowitsch       PetscCall(VecScatterView(baij->Mvctx, viewer));
9293ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
930fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
9319566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
9323ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
933c1490034SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
9343ba16761SJacob Faibussowitsch       PetscFunctionReturn(PETSC_SUCCESS);
935a30f8f8cSSatish Balay     }
936a30f8f8cSSatish Balay   }
937a30f8f8cSSatish Balay 
938a30f8f8cSSatish Balay   if (isdraw) {
939b0a32e0cSBarry Smith     PetscDraw draw;
940ace3abfcSBarry Smith     PetscBool isnull;
9419566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
9429566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
9433ba16761SJacob Faibussowitsch     if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944a30f8f8cSSatish Balay   }
945a30f8f8cSSatish Balay 
9467da1fb6eSBarry Smith   {
947a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
948a30f8f8cSSatish Balay     Mat           A;
94965d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
95065d70643SHong Zhang     Mat_SeqBAIJ  *Bloc;
951d0f46423SBarry Smith     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952a30f8f8cSSatish Balay     MatScalar    *a;
9533e219373SBarry Smith     const char   *matname;
954a30f8f8cSSatish Balay 
955f204ca49SKris Buschelman     /* Should this be the same type as mat? */
9569566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957dd400576SPatrick Sanan     if (rank == 0) {
9589566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
959a30f8f8cSSatish Balay     } else {
9609566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
961a30f8f8cSSatish Balay     }
9629566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPISBAIJ));
9639566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
9649566063dSJacob Faibussowitsch     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
965a30f8f8cSSatish Balay 
966a30f8f8cSSatish Balay     /* copy over the A part */
96765d70643SHong Zhang     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
9689371c9d4SSatish Balay     ai   = Aloc->i;
9699371c9d4SSatish Balay     aj   = Aloc->j;
9709371c9d4SSatish Balay     a    = Aloc->a;
9719566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs, &rvals));
972a30f8f8cSSatish Balay 
973a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
974e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
97526fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
977e9f7bc9eSHong Zhang         col = (baij->cstartbs + aj[j]) * bs;
978a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9799566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
98026fbe8dcSKarl Rupp           col++;
98126fbe8dcSKarl Rupp           a += bs;
982a30f8f8cSSatish Balay         }
983a30f8f8cSSatish Balay       }
984a30f8f8cSSatish Balay     }
985a30f8f8cSSatish Balay     /* copy over the B part */
98665d70643SHong Zhang     Bloc = (Mat_SeqBAIJ *)baij->B->data;
9879371c9d4SSatish Balay     ai   = Bloc->i;
9889371c9d4SSatish Balay     aj   = Bloc->j;
9899371c9d4SSatish Balay     a    = Bloc->a;
990a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
991e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
99226fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
994a30f8f8cSSatish Balay         col = baij->garray[aj[j]] * bs;
995a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9969566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
99726fbe8dcSKarl Rupp           col++;
99826fbe8dcSKarl Rupp           a += bs;
999a30f8f8cSSatish Balay         }
1000a30f8f8cSSatish Balay       }
1001a30f8f8cSSatish Balay     }
10029566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvals));
10039566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
10049566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005a30f8f8cSSatish Balay     /*
1006a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
1007b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
1008a30f8f8cSSatish Balay     */
10099566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
101023a3927dSBarry Smith     if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011dd400576SPatrick Sanan     if (rank == 0) {
101223a3927dSBarry Smith       if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
10139566063dSJacob Faibussowitsch       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014a30f8f8cSSatish Balay     }
10159566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
10169566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
10179566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
1018a30f8f8cSSatish Balay   }
10193ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1020a30f8f8cSSatish Balay }
1021a30f8f8cSSatish Balay 
1022618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1024d1654148SHong Zhang 
1025d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026d71ae5a4SJacob Faibussowitsch {
1027ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
1028a30f8f8cSSatish Balay 
1029a30f8f8cSSatish Balay   PetscFunctionBegin;
10309566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
10319566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
10329566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
10339566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034d1654148SHong Zhang   if (iascii || isdraw || issocket) {
10359566063dSJacob Faibussowitsch     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
10361baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
10373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1038a30f8f8cSSatish Balay }
1039a30f8f8cSSatish Balay 
1040d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041d71ae5a4SJacob Faibussowitsch {
1042547795f9SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1043eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
10446de40e93SBarry Smith   PetscScalar       *from;
10456de40e93SBarry Smith   const PetscScalar *x;
1046547795f9SHong Zhang 
1047547795f9SHong Zhang   PetscFunctionBegin;
1048547795f9SHong Zhang   /* diagonal part */
10499566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1050629a200eSBarry Smith   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1051629a200eSBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1052629a200eSBarry Smith   PetscCall(VecZeroEntries(a->slvec1b));
1053547795f9SHong Zhang 
1054547795f9SHong Zhang   /* subdiagonal part */
10555f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
10569566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1057547795f9SHong Zhang 
1058547795f9SHong Zhang   /* copy x into the vec slvec0 */
10599566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10609566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1061547795f9SHong Zhang 
10629566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10639566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10649566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1065547795f9SHong Zhang 
10669566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10679566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1068547795f9SHong Zhang   /* supperdiagonal part */
10699566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
10703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1071547795f9SHong Zhang }
1072547795f9SHong Zhang 
1073d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1074d71ae5a4SJacob Faibussowitsch {
1075a9d4b620SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1076eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1077d9ca1df4SBarry Smith   PetscScalar       *from;
1078d9ca1df4SBarry Smith   const PetscScalar *x;
1079a9d4b620SHong Zhang 
1080a9d4b620SHong Zhang   PetscFunctionBegin;
1081a9d4b620SHong Zhang   /* diagonal part */
10829566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1083629a200eSBarry Smith   /* since a->slvec1b shares memory (dangerously) with a->slec1 changes to a->slec1 will affect it */
1084629a200eSBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1085629a200eSBarry Smith   PetscCall(VecZeroEntries(a->slvec1b));
1086a9d4b620SHong Zhang 
1087a9d4b620SHong Zhang   /* subdiagonal part */
10889566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1089fc165ae2SBarry Smith 
1090a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
10919566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10929566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1093a9d4b620SHong Zhang 
10949566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10959566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10969566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1097fc165ae2SBarry Smith 
10989566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10999566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1100a9d4b620SHong Zhang   /* supperdiagonal part */
11019566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
11023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1103a9d4b620SHong Zhang }
1104a9d4b620SHong Zhang 
1105d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1106d71ae5a4SJacob Faibussowitsch {
1107eb1ec7c1SStefano Zampini   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1108eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1109629a200eSBarry Smith   PetscScalar       *from;
1110eb1ec7c1SStefano Zampini   const PetscScalar *x;
1111eb1ec7c1SStefano Zampini 
1112eb1ec7c1SStefano Zampini   PetscFunctionBegin;
1113eb1ec7c1SStefano Zampini   /* diagonal part */
11149566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1115629a200eSBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1116629a200eSBarry Smith   PetscCall(VecZeroEntries(a->slvec1b));
1117eb1ec7c1SStefano Zampini 
1118eb1ec7c1SStefano Zampini   /* subdiagonal part */
11195f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
11209566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1121eb1ec7c1SStefano Zampini 
1122eb1ec7c1SStefano Zampini   /* copy x into the vec slvec0 */
11239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11249566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11259566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11269566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1127eb1ec7c1SStefano Zampini 
11289566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11299566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11309566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1131eb1ec7c1SStefano Zampini 
1132eb1ec7c1SStefano Zampini   /* supperdiagonal part */
11339566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
11343ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1135eb1ec7c1SStefano Zampini }
1136eb1ec7c1SStefano Zampini 
1137d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1138d71ae5a4SJacob Faibussowitsch {
1139de8b6608SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1140d0f46423SBarry Smith   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1141629a200eSBarry Smith   PetscScalar       *from;
1142d9ca1df4SBarry Smith   const PetscScalar *x;
1143a9d4b620SHong Zhang 
1144a9d4b620SHong Zhang   PetscFunctionBegin;
1145a9d4b620SHong Zhang   /* diagonal part */
11469566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1147629a200eSBarry Smith   PetscCall(PetscObjectStateIncrease((PetscObject)a->slvec1b));
1148629a200eSBarry Smith   PetscCall(VecZeroEntries(a->slvec1b));
1149a9d4b620SHong Zhang 
1150a9d4b620SHong Zhang   /* subdiagonal part */
11519566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1152a9d4b620SHong Zhang 
1153a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
11549566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11559566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11569566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11579566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1158a9d4b620SHong Zhang 
11599566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11609566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11619566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1162a9d4b620SHong Zhang 
1163a9d4b620SHong Zhang   /* supperdiagonal part */
11649566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
11653ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1166a9d4b620SHong Zhang }
1167a9d4b620SHong Zhang 
1168a30f8f8cSSatish Balay /*
1169a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1170a30f8f8cSSatish Balay    diagonal block
1171a30f8f8cSSatish Balay */
1172d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1173d71ae5a4SJacob Faibussowitsch {
1174a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1175a30f8f8cSSatish Balay 
1176a30f8f8cSSatish Balay   PetscFunctionBegin;
117708401ef6SPierre Jolivet   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
11789566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A, v));
11793ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1180a30f8f8cSSatish Balay }
1181a30f8f8cSSatish Balay 
1182d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1183d71ae5a4SJacob Faibussowitsch {
1184a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1185a30f8f8cSSatish Balay 
1186a30f8f8cSSatish Balay   PetscFunctionBegin;
11879566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
11889566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
11893ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1190a30f8f8cSSatish Balay }
1191a30f8f8cSSatish Balay 
1192d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1193d71ae5a4SJacob Faibussowitsch {
1194d0d4cfc2SHong Zhang   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1195d0d4cfc2SHong Zhang   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1196d0f46423SBarry Smith   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1197d0f46423SBarry Smith   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1198899cda47SBarry Smith   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1199d0d4cfc2SHong Zhang 
1200a30f8f8cSSatish Balay   PetscFunctionBegin;
12015f80ce2aSJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1202d0d4cfc2SHong Zhang   mat->getrowactive = PETSC_TRUE;
1203d0d4cfc2SHong Zhang 
1204d0d4cfc2SHong Zhang   if (!mat->rowvalues && (idx || v)) {
1205d0d4cfc2SHong Zhang     /*
1206d0d4cfc2SHong Zhang         allocate enough space to hold information from the longest row.
1207d0d4cfc2SHong Zhang     */
1208d0d4cfc2SHong Zhang     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1209d0d4cfc2SHong Zhang     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1210d0d4cfc2SHong Zhang     PetscInt      max = 1, mbs = mat->mbs, tmp;
1211d0d4cfc2SHong Zhang     for (i = 0; i < mbs; i++) {
1212d0d4cfc2SHong Zhang       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
121326fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1214d0d4cfc2SHong Zhang     }
12159566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1216d0d4cfc2SHong Zhang   }
1217d0d4cfc2SHong Zhang 
12185f80ce2aSJacob Faibussowitsch   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1219d0d4cfc2SHong Zhang   lrow = row - brstart; /* local row index */
1220d0d4cfc2SHong Zhang 
12219371c9d4SSatish Balay   pvA = &vworkA;
12229371c9d4SSatish Balay   pcA = &cworkA;
12239371c9d4SSatish Balay   pvB = &vworkB;
12249371c9d4SSatish Balay   pcB = &cworkB;
12259371c9d4SSatish Balay   if (!v) {
12269371c9d4SSatish Balay     pvA = NULL;
12279371c9d4SSatish Balay     pvB = NULL;
12289371c9d4SSatish Balay   }
12299371c9d4SSatish Balay   if (!idx) {
12309371c9d4SSatish Balay     pcA = NULL;
12319371c9d4SSatish Balay     if (!v) pcB = NULL;
12329371c9d4SSatish Balay   }
12339566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
12349566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1235d0d4cfc2SHong Zhang   nztot = nzA + nzB;
1236d0d4cfc2SHong Zhang 
1237d0d4cfc2SHong Zhang   cmap = mat->garray;
1238d0d4cfc2SHong Zhang   if (v || idx) {
1239d0d4cfc2SHong Zhang     if (nztot) {
1240d0d4cfc2SHong Zhang       /* Sort by increasing column numbers, assuming A and B already sorted */
1241d0d4cfc2SHong Zhang       PetscInt imark = -1;
1242d0d4cfc2SHong Zhang       if (v) {
1243d0d4cfc2SHong Zhang         *v = v_p = mat->rowvalues;
1244d0d4cfc2SHong Zhang         for (i = 0; i < nzB; i++) {
1245d0d4cfc2SHong Zhang           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1246d0d4cfc2SHong Zhang           else break;
1247d0d4cfc2SHong Zhang         }
1248d0d4cfc2SHong Zhang         imark = i;
1249d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1250d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1251d0d4cfc2SHong Zhang       }
1252d0d4cfc2SHong Zhang       if (idx) {
1253d0d4cfc2SHong Zhang         *idx = idx_p = mat->rowindices;
1254d0d4cfc2SHong Zhang         if (imark > -1) {
1255ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1256d0d4cfc2SHong Zhang         } else {
1257d0d4cfc2SHong Zhang           for (i = 0; i < nzB; i++) {
125826fbe8dcSKarl Rupp             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259d0d4cfc2SHong Zhang             else break;
1260d0d4cfc2SHong Zhang           }
1261d0d4cfc2SHong Zhang           imark = i;
1262d0d4cfc2SHong Zhang         }
1263d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1264d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1265d0d4cfc2SHong Zhang       }
1266d0d4cfc2SHong Zhang     } else {
1267f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1268f4259b30SLisandro Dalcin       if (v) *v = NULL;
1269d0d4cfc2SHong Zhang     }
1270d0d4cfc2SHong Zhang   }
1271d0d4cfc2SHong Zhang   *nz = nztot;
12729566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
12739566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
12743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1275a30f8f8cSSatish Balay }
1276a30f8f8cSSatish Balay 
1277d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1278d71ae5a4SJacob Faibussowitsch {
1279a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1280a30f8f8cSSatish Balay 
1281a30f8f8cSSatish Balay   PetscFunctionBegin;
12825f80ce2aSJacob Faibussowitsch   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1283a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
12843ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1285a30f8f8cSSatish Balay }
1286a30f8f8cSSatish Balay 
1287d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1288d71ae5a4SJacob Faibussowitsch {
1289d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1290d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1291d0d4cfc2SHong Zhang 
1292d0d4cfc2SHong Zhang   PetscFunctionBegin;
1293d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_TRUE;
12943ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1295d0d4cfc2SHong Zhang }
1296d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1297d71ae5a4SJacob Faibussowitsch {
1298d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1299d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1300d0d4cfc2SHong Zhang 
1301d0d4cfc2SHong Zhang   PetscFunctionBegin;
1302d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_FALSE;
13033ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1304d0d4cfc2SHong Zhang }
1305d0d4cfc2SHong Zhang 
1306d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1307d71ae5a4SJacob Faibussowitsch {
13085f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
13095f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
13102726fb6dSPierre Jolivet     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
13112726fb6dSPierre Jolivet 
13129566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->A));
13139566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->B));
13145f80ce2aSJacob Faibussowitsch   }
13153ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13162726fb6dSPierre Jolivet }
13172726fb6dSPierre Jolivet 
1318d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1319d71ae5a4SJacob Faibussowitsch {
132099cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
132199cafbc1SBarry Smith 
132299cafbc1SBarry Smith   PetscFunctionBegin;
13239566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
13249566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
13253ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
132699cafbc1SBarry Smith }
132799cafbc1SBarry Smith 
1328d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1329d71ae5a4SJacob Faibussowitsch {
133099cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
133199cafbc1SBarry Smith 
133299cafbc1SBarry Smith   PetscFunctionBegin;
13339566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
13349566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
13353ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
133699cafbc1SBarry Smith }
133799cafbc1SBarry Smith 
13387dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
133936032a97SHong Zhang    Input: isrow       - distributed(parallel),
134036032a97SHong Zhang           iscol_local - locally owned (seq)
134136032a97SHong Zhang */
1342d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1343d71ae5a4SJacob Faibussowitsch {
13448f46ffcaSHong Zhang   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
13458f46ffcaSHong Zhang   const PetscInt *ptr1, *ptr2;
134636032a97SHong Zhang 
134736032a97SHong Zhang   PetscFunctionBegin;
13489566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &sz1));
13499566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_local, &sz2));
13501098a8e8SHong Zhang   if (sz1 > sz2) {
13511098a8e8SHong Zhang     *flg = PETSC_FALSE;
13523ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
13531098a8e8SHong Zhang   }
13548f46ffcaSHong Zhang 
13559566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &ptr1));
13569566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &ptr2));
13578f46ffcaSHong Zhang 
13589566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz1, &a1));
13599566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz2, &a2));
13609566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a1, ptr1, sz1));
13619566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a2, ptr2, sz2));
13629566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz1, a1));
13639566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz2, a2));
13648f46ffcaSHong Zhang 
13658f46ffcaSHong Zhang   nmatch = 0;
13668f46ffcaSHong Zhang   k      = 0;
13678f46ffcaSHong Zhang   for (i = 0; i < sz1; i++) {
13688f46ffcaSHong Zhang     for (j = k; j < sz2; j++) {
13698f46ffcaSHong Zhang       if (a1[i] == a2[j]) {
13709371c9d4SSatish Balay         k = j;
13719371c9d4SSatish Balay         nmatch++;
13728f46ffcaSHong Zhang         break;
13738f46ffcaSHong Zhang       }
13748f46ffcaSHong Zhang     }
13758f46ffcaSHong Zhang   }
13769566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &ptr1));
13779566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
13789566063dSJacob Faibussowitsch   PetscCall(PetscFree(a1));
13799566063dSJacob Faibussowitsch   PetscCall(PetscFree(a2));
13801098a8e8SHong Zhang   if (nmatch < sz1) {
13811098a8e8SHong Zhang     *flg = PETSC_FALSE;
13821098a8e8SHong Zhang   } else {
13831098a8e8SHong Zhang     *flg = PETSC_TRUE;
13841098a8e8SHong Zhang   }
13853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
13868f46ffcaSHong Zhang }
138736032a97SHong Zhang 
1388d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1389d71ae5a4SJacob Faibussowitsch {
139036032a97SHong Zhang   IS        iscol_local;
139136032a97SHong Zhang   PetscInt  csize;
139236032a97SHong Zhang   PetscBool isequal;
139336032a97SHong Zhang 
139436032a97SHong Zhang   PetscFunctionBegin;
13959566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
139636032a97SHong Zhang   if (call == MAT_REUSE_MATRIX) {
13979566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
13985f80ce2aSJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
139936032a97SHong Zhang   } else {
1400068661f9SToby Isaac     PetscBool issorted;
1401068661f9SToby Isaac 
14029566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
14039566063dSJacob Faibussowitsch     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
14049566063dSJacob Faibussowitsch     PetscCall(ISSorted(iscol_local, &issorted));
14055f80ce2aSJacob Faibussowitsch     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
14068f46ffcaSHong Zhang   }
14078f46ffcaSHong Zhang 
14087dae84e0SHong Zhang   /* now call MatCreateSubMatrix_MPIBAIJ() */
14099566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
14108f46ffcaSHong Zhang   if (call == MAT_INITIAL_MATRIX) {
14119566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
14129566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
14138f46ffcaSHong Zhang   }
14143ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
14158f46ffcaSHong Zhang }
14168f46ffcaSHong Zhang 
1417d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1418d71ae5a4SJacob Faibussowitsch {
1419a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1420a30f8f8cSSatish Balay 
1421a30f8f8cSSatish Balay   PetscFunctionBegin;
14229566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
14239566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
14243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1425a30f8f8cSSatish Balay }
1426a30f8f8cSSatish Balay 
1427d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1428d71ae5a4SJacob Faibussowitsch {
1429a30f8f8cSSatish Balay   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1430a30f8f8cSSatish Balay   Mat            A = a->A, B = a->B;
14313966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
1432a30f8f8cSSatish Balay 
1433a30f8f8cSSatish Balay   PetscFunctionBegin;
1434d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
143526fbe8dcSKarl Rupp 
14369566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
143726fbe8dcSKarl Rupp 
14389371c9d4SSatish Balay   isend[0] = info->nz_used;
14399371c9d4SSatish Balay   isend[1] = info->nz_allocated;
14409371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
14419371c9d4SSatish Balay   isend[3] = info->memory;
14429371c9d4SSatish Balay   isend[4] = info->mallocs;
144326fbe8dcSKarl Rupp 
14449566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
144526fbe8dcSKarl Rupp 
14469371c9d4SSatish Balay   isend[0] += info->nz_used;
14479371c9d4SSatish Balay   isend[1] += info->nz_allocated;
14489371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
14499371c9d4SSatish Balay   isend[3] += info->memory;
14509371c9d4SSatish Balay   isend[4] += info->mallocs;
1451a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1452a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1453a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1454a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1455a30f8f8cSSatish Balay     info->memory       = isend[3];
1456a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1457a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14581c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
145926fbe8dcSKarl Rupp 
1460a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1461a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1462a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1463a30f8f8cSSatish Balay     info->memory       = irecv[3];
1464a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1465a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14661c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
146726fbe8dcSKarl Rupp 
1468a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1469a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1470a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1471a30f8f8cSSatish Balay     info->memory       = irecv[3];
1472a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
147398921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1474a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1475a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1476a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
14773ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1478a30f8f8cSSatish Balay }
1479a30f8f8cSSatish Balay 
1480d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1481d71ae5a4SJacob Faibussowitsch {
1482a30f8f8cSSatish Balay   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1483d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1484a30f8f8cSSatish Balay 
1485a30f8f8cSSatish Balay   PetscFunctionBegin;
1486e98b92d7SKris Buschelman   switch (op) {
1487512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1488e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
148928b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1490a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1491c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1492e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
149343674050SBarry Smith     MatCheckPreallocated(A, 1);
14949566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14959566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1496e98b92d7SKris Buschelman     break;
1497e98b92d7SKris Buschelman   case MAT_ROW_ORIENTED:
149843674050SBarry Smith     MatCheckPreallocated(A, 1);
14994e0d8c25SBarry Smith     a->roworiented = flg;
150026fbe8dcSKarl Rupp 
15019566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
15029566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1503e98b92d7SKris Buschelman     break;
15048c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1505d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1506d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1507d71ae5a4SJacob Faibussowitsch     break;
1508d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
1509d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
1510d71ae5a4SJacob Faibussowitsch     break;
1511d71ae5a4SJacob Faibussowitsch   case MAT_USE_HASH_TABLE:
1512d71ae5a4SJacob Faibussowitsch     a->ht_flag = flg;
1513d71ae5a4SJacob Faibussowitsch     break;
1514d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
1515d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1516d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
15170f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1518eb1ec7c1SStefano Zampini     if (flg) { /* need different mat-vec ops */
1519547795f9SHong Zhang       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1520eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1521eb1ec7c1SStefano Zampini       A->ops->multtranspose    = NULL;
1522eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = NULL;
1523b94d7dedSBarry Smith       A->symmetric             = PETSC_BOOL3_FALSE;
1524eb1ec7c1SStefano Zampini     }
15250f2140c7SStefano Zampini #endif
1526eeffb40dSHong Zhang     break;
1527ffa07934SHong Zhang   case MAT_SPD:
1528d71ae5a4SJacob Faibussowitsch   case MAT_SYMMETRIC:
1529d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1530d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1531eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1532eb1ec7c1SStefano Zampini     if (flg) { /* restore to use default mat-vec ops */
1533eb1ec7c1SStefano Zampini       A->ops->mult             = MatMult_MPISBAIJ;
1534eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1535eb1ec7c1SStefano Zampini       A->ops->multtranspose    = MatMult_MPISBAIJ;
1536eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1537eb1ec7c1SStefano Zampini     }
1538eb1ec7c1SStefano Zampini #endif
1539eeffb40dSHong Zhang     break;
154077e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
154143674050SBarry Smith     MatCheckPreallocated(A, 1);
15429566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1543eeffb40dSHong Zhang     break;
15449a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1545b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
15465f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
15479566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
154877e54ba9SKris Buschelman     break;
1549d71ae5a4SJacob Faibussowitsch   case MAT_SPD_ETERNAL:
1550d71ae5a4SJacob Faibussowitsch     break;
1551d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_LOWER_TRIANGULAR:
1552d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1553d71ae5a4SJacob Faibussowitsch     break;
1554d71ae5a4SJacob Faibussowitsch   case MAT_ERROR_LOWER_TRIANGULAR:
1555d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1556d71ae5a4SJacob Faibussowitsch     break;
1557d71ae5a4SJacob Faibussowitsch   case MAT_GETROW_UPPERTRIANGULAR:
1558d71ae5a4SJacob Faibussowitsch     aA->getrow_utriangular = flg;
1559d71ae5a4SJacob Faibussowitsch     break;
1560d71ae5a4SJacob Faibussowitsch   default:
1561d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1562a30f8f8cSSatish Balay   }
15633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1564a30f8f8cSSatish Balay }
1565a30f8f8cSSatish Balay 
1566d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1567d71ae5a4SJacob Faibussowitsch {
1568a30f8f8cSSatish Balay   PetscFunctionBegin;
15697fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1570cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
15719566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1572cf37664fSBarry Smith   } else if (reuse == MAT_REUSE_MATRIX) {
15739566063dSJacob Faibussowitsch     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1574fc4dec0aSBarry Smith   }
15753ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1576a30f8f8cSSatish Balay }
1577a30f8f8cSSatish Balay 
1578d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1579d71ae5a4SJacob Faibussowitsch {
1580a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1581a30f8f8cSSatish Balay   Mat           a = baij->A, b = baij->B;
15825e90f9d9SHong Zhang   PetscInt      nv, m, n;
1583ace3abfcSBarry Smith   PetscBool     flg;
1584a30f8f8cSSatish Balay 
1585a30f8f8cSSatish Balay   PetscFunctionBegin;
1586a30f8f8cSSatish Balay   if (ll != rr) {
15879566063dSJacob Faibussowitsch     PetscCall(VecEqual(ll, rr, &flg));
15885f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1589a30f8f8cSSatish Balay   }
15903ba16761SJacob Faibussowitsch   if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1591b3bf805bSHong Zhang 
15929566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
15935f80ce2aSJacob Faibussowitsch   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1594b3bf805bSHong Zhang 
15959566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(rr, &nv));
15965f80ce2aSJacob Faibussowitsch   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
15975e90f9d9SHong Zhang 
15989566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
15995e90f9d9SHong Zhang 
16005e90f9d9SHong Zhang   /* left diagonalscale the off-diagonal part */
1601dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
16025e90f9d9SHong Zhang 
16035e90f9d9SHong Zhang   /* scale the diagonal part */
1604dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1605a30f8f8cSSatish Balay 
16065e90f9d9SHong Zhang   /* right diagonalscale the off-diagonal part */
16079566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1608dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
16093ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1610a30f8f8cSSatish Balay }
1611a30f8f8cSSatish Balay 
1612d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1613d71ae5a4SJacob Faibussowitsch {
1614f3566a2aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1615a30f8f8cSSatish Balay 
1616a30f8f8cSSatish Balay   PetscFunctionBegin;
16179566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
16183ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1619a30f8f8cSSatish Balay }
1620a30f8f8cSSatish Balay 
16216849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1622a30f8f8cSSatish Balay 
1623d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1624d71ae5a4SJacob Faibussowitsch {
1625a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1626a30f8f8cSSatish Balay   Mat           a, b, c, d;
1627ace3abfcSBarry Smith   PetscBool     flg;
1628a30f8f8cSSatish Balay 
1629a30f8f8cSSatish Balay   PetscFunctionBegin;
16309371c9d4SSatish Balay   a = matA->A;
16319371c9d4SSatish Balay   b = matA->B;
16329371c9d4SSatish Balay   c = matB->A;
16339371c9d4SSatish Balay   d = matB->B;
1634a30f8f8cSSatish Balay 
16359566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
163648a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
16371c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
16383ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1639a30f8f8cSSatish Balay }
1640a30f8f8cSSatish Balay 
1641d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1642d71ae5a4SJacob Faibussowitsch {
16434c7a3774SStefano Zampini   PetscBool isbaij;
16443c896bc6SHong Zhang 
16453c896bc6SHong Zhang   PetscFunctionBegin;
16469566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
16475f80ce2aSJacob Faibussowitsch   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
16483c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
16493c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
16509566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(A));
16519566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
16529566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(A));
16533c896bc6SHong Zhang   } else {
16544c7a3774SStefano Zampini     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
16554c7a3774SStefano Zampini     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
16564c7a3774SStefano Zampini 
16579566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
16589566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
16593c896bc6SHong Zhang   }
16609566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
16613ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
16623c896bc6SHong Zhang }
16633c896bc6SHong Zhang 
1664d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1665d71ae5a4SJacob Faibussowitsch {
16664fe895cdSHong Zhang   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
16674fe895cdSHong Zhang   PetscBLASInt  bnz, one                          = 1;
16684fe895cdSHong Zhang   Mat_SeqSBAIJ *xa, *ya;
16694fe895cdSHong Zhang   Mat_SeqBAIJ  *xb, *yb;
16704fe895cdSHong Zhang 
16714fe895cdSHong Zhang   PetscFunctionBegin;
16724fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
16734fe895cdSHong Zhang     PetscScalar alpha = a;
16744fe895cdSHong Zhang     xa                = (Mat_SeqSBAIJ *)xx->A->data;
16754fe895cdSHong Zhang     ya                = (Mat_SeqSBAIJ *)yy->A->data;
16769566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1677792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
16784fe895cdSHong Zhang     xb = (Mat_SeqBAIJ *)xx->B->data;
16794fe895cdSHong Zhang     yb = (Mat_SeqBAIJ *)yy->B->data;
16809566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1681792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
16829566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1683ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
16849566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
16859566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
16869566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
16874fe895cdSHong Zhang   } else {
16884de5dceeSHong Zhang     Mat       B;
16894de5dceeSHong Zhang     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
16905f80ce2aSJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
16919566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(X));
16929566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(Y));
16939566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
16949566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
16959566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
16969566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
16979566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
16989566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
16999566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, MATMPISBAIJ));
17009566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
17019566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
17029566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
17039566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
17049566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
17059566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
17069566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
17079566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(X));
17089566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(Y));
17094fe895cdSHong Zhang   }
17103ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17114fe895cdSHong Zhang }
17124fe895cdSHong Zhang 
1713d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1714d71ae5a4SJacob Faibussowitsch {
17151302d50aSBarry Smith   PetscInt  i;
1716afebec48SHong Zhang   PetscBool flg;
1717a5e6ed63SBarry Smith 
17186849ba73SBarry Smith   PetscFunctionBegin;
17199566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1720a5e6ed63SBarry Smith   for (i = 0; i < n; i++) {
17219566063dSJacob Faibussowitsch     PetscCall(ISEqual(irow[i], icol[i], &flg));
172248a46eb9SPierre Jolivet     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
17234dcd73b1SHong Zhang   }
17243ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1725a5e6ed63SBarry Smith }
1726a5e6ed63SBarry Smith 
1727d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1728d71ae5a4SJacob Faibussowitsch {
17297d68702bSBarry Smith   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
17306f33a894SBarry Smith   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
17317d68702bSBarry Smith 
17327d68702bSBarry Smith   PetscFunctionBegin;
17336f33a894SBarry Smith   if (!Y->preallocated) {
17349566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
17356f33a894SBarry Smith   } else if (!aij->nz) {
1736b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
17379566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1738b83222d8SBarry Smith     aij->nonew = nonew;
17397d68702bSBarry Smith   }
17409566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
17413ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17427d68702bSBarry Smith }
17437d68702bSBarry Smith 
1744d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1745d71ae5a4SJacob Faibussowitsch {
17463b49f96aSBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
17473b49f96aSBarry Smith 
17483b49f96aSBarry Smith   PetscFunctionBegin;
17495f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
17509566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
17513b49f96aSBarry Smith   if (d) {
17523b49f96aSBarry Smith     PetscInt rstart;
17539566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
17543b49f96aSBarry Smith     *d += rstart / A->rmap->bs;
17553b49f96aSBarry Smith   }
17563ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
17573b49f96aSBarry Smith }
17583b49f96aSBarry Smith 
1759d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1760d71ae5a4SJacob Faibussowitsch {
1761a5b7ff6bSBarry Smith   PetscFunctionBegin;
1762a5b7ff6bSBarry Smith   *a = ((Mat_MPISBAIJ *)A->data)->A;
17633ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1764a5b7ff6bSBarry Smith }
17653b49f96aSBarry Smith 
17663964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1767a30f8f8cSSatish Balay                                        MatGetRow_MPISBAIJ,
1768a30f8f8cSSatish Balay                                        MatRestoreRow_MPISBAIJ,
1769a9d4b620SHong Zhang                                        MatMult_MPISBAIJ,
177097304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPISBAIJ,
1771431c96f7SBarry Smith                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1772431c96f7SBarry Smith                                        MatMultAdd_MPISBAIJ,
1773f4259b30SLisandro Dalcin                                        NULL,
1774f4259b30SLisandro Dalcin                                        NULL,
1775f4259b30SLisandro Dalcin                                        NULL,
1776f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1777f4259b30SLisandro Dalcin                                        NULL,
1778f4259b30SLisandro Dalcin                                        NULL,
177941f059aeSBarry Smith                                        MatSOR_MPISBAIJ,
1780a30f8f8cSSatish Balay                                        MatTranspose_MPISBAIJ,
178197304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPISBAIJ,
1782a30f8f8cSSatish Balay                                        MatEqual_MPISBAIJ,
1783a30f8f8cSSatish Balay                                        MatGetDiagonal_MPISBAIJ,
1784a30f8f8cSSatish Balay                                        MatDiagonalScale_MPISBAIJ,
1785a30f8f8cSSatish Balay                                        MatNorm_MPISBAIJ,
178697304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1787a30f8f8cSSatish Balay                                        MatAssemblyEnd_MPISBAIJ,
1788a30f8f8cSSatish Balay                                        MatSetOption_MPISBAIJ,
1789a30f8f8cSSatish Balay                                        MatZeroEntries_MPISBAIJ,
1790f4259b30SLisandro Dalcin                                        /* 24*/ NULL,
1791f4259b30SLisandro Dalcin                                        NULL,
1792f4259b30SLisandro Dalcin                                        NULL,
1793f4259b30SLisandro Dalcin                                        NULL,
1794f4259b30SLisandro Dalcin                                        NULL,
179526cec326SBarry Smith                                        /* 29*/ MatSetUp_MPI_Hash,
1796f4259b30SLisandro Dalcin                                        NULL,
1797f4259b30SLisandro Dalcin                                        NULL,
1798a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPISBAIJ,
1799f4259b30SLisandro Dalcin                                        NULL,
1800d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPISBAIJ,
1801f4259b30SLisandro Dalcin                                        NULL,
1802f4259b30SLisandro Dalcin                                        NULL,
1803f4259b30SLisandro Dalcin                                        NULL,
1804f4259b30SLisandro Dalcin                                        NULL,
1805d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPISBAIJ,
18067dae84e0SHong Zhang                                        MatCreateSubMatrices_MPISBAIJ,
1807d94109b8SHong Zhang                                        MatIncreaseOverlap_MPISBAIJ,
1808a30f8f8cSSatish Balay                                        MatGetValues_MPISBAIJ,
18093c896bc6SHong Zhang                                        MatCopy_MPISBAIJ,
1810f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
1811a30f8f8cSSatish Balay                                        MatScale_MPISBAIJ,
18127d68702bSBarry Smith                                        MatShift_MPISBAIJ,
1813f4259b30SLisandro Dalcin                                        NULL,
1814f4259b30SLisandro Dalcin                                        NULL,
1815f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
1816f4259b30SLisandro Dalcin                                        NULL,
1817f4259b30SLisandro Dalcin                                        NULL,
1818f4259b30SLisandro Dalcin                                        NULL,
1819f4259b30SLisandro Dalcin                                        NULL,
1820f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1821f4259b30SLisandro Dalcin                                        NULL,
1822a30f8f8cSSatish Balay                                        MatSetUnfactored_MPISBAIJ,
1823f4259b30SLisandro Dalcin                                        NULL,
1824a30f8f8cSSatish Balay                                        MatSetValuesBlocked_MPISBAIJ,
18257dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1826f4259b30SLisandro Dalcin                                        NULL,
1827f4259b30SLisandro Dalcin                                        NULL,
1828f4259b30SLisandro Dalcin                                        NULL,
1829f4259b30SLisandro Dalcin                                        NULL,
1830f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1831f4259b30SLisandro Dalcin                                        NULL,
1832f4259b30SLisandro Dalcin                                        NULL,
1833f4259b30SLisandro Dalcin                                        NULL,
1834f4259b30SLisandro Dalcin                                        NULL,
1835d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1836f4259b30SLisandro Dalcin                                        NULL,
183728d58a37SPierre Jolivet                                        MatConvert_MPISBAIJ_Basic,
1838f4259b30SLisandro Dalcin                                        NULL,
1839f4259b30SLisandro Dalcin                                        NULL,
1840f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1841f4259b30SLisandro Dalcin                                        NULL,
1842f4259b30SLisandro Dalcin                                        NULL,
1843f4259b30SLisandro Dalcin                                        NULL,
1844f4259b30SLisandro Dalcin                                        NULL,
1845f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1846f4259b30SLisandro Dalcin                                        NULL,
1847f4259b30SLisandro Dalcin                                        NULL,
1848f4259b30SLisandro Dalcin                                        NULL,
18495bba2384SShri Abhyankar                                        MatLoad_MPISBAIJ,
1850f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
1851f4259b30SLisandro Dalcin                                        NULL,
1852f4259b30SLisandro Dalcin                                        NULL,
1853f4259b30SLisandro Dalcin                                        NULL,
1854f4259b30SLisandro Dalcin                                        NULL,
1855f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1856f4259b30SLisandro Dalcin                                        NULL,
1857f4259b30SLisandro Dalcin                                        NULL,
1858f4259b30SLisandro Dalcin                                        NULL,
1859f4259b30SLisandro Dalcin                                        NULL,
1860f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1861f4259b30SLisandro Dalcin                                        NULL,
1862f4259b30SLisandro Dalcin                                        NULL,
1863f4259b30SLisandro Dalcin                                        NULL,
1864f4259b30SLisandro Dalcin                                        NULL,
1865f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
1866f4259b30SLisandro Dalcin                                        NULL,
1867f4259b30SLisandro Dalcin                                        NULL,
18682726fb6dSPierre Jolivet                                        MatConjugate_MPISBAIJ,
1869f4259b30SLisandro Dalcin                                        NULL,
1870f4259b30SLisandro Dalcin                                        /*104*/ NULL,
187199cafbc1SBarry Smith                                        MatRealPart_MPISBAIJ,
1872d0d4cfc2SHong Zhang                                        MatImaginaryPart_MPISBAIJ,
1873d0d4cfc2SHong Zhang                                        MatGetRowUpperTriangular_MPISBAIJ,
187495936485SShri Abhyankar                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1875f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1876f4259b30SLisandro Dalcin                                        NULL,
1877f4259b30SLisandro Dalcin                                        NULL,
1878f4259b30SLisandro Dalcin                                        NULL,
18793b49f96aSBarry Smith                                        MatMissingDiagonal_MPISBAIJ,
1880f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1881f4259b30SLisandro Dalcin                                        NULL,
1882f4259b30SLisandro Dalcin                                        NULL,
1883f4259b30SLisandro Dalcin                                        NULL,
1884f4259b30SLisandro Dalcin                                        NULL,
1885f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1886f4259b30SLisandro Dalcin                                        NULL,
1887f4259b30SLisandro Dalcin                                        NULL,
1888f4259b30SLisandro Dalcin                                        NULL,
1889f4259b30SLisandro Dalcin                                        NULL,
1890f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1891f4259b30SLisandro Dalcin                                        NULL,
1892f4259b30SLisandro Dalcin                                        NULL,
1893f4259b30SLisandro Dalcin                                        NULL,
1894f4259b30SLisandro Dalcin                                        NULL,
1895f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1896f4259b30SLisandro Dalcin                                        NULL,
1897f4259b30SLisandro Dalcin                                        NULL,
1898f4259b30SLisandro Dalcin                                        NULL,
1899f4259b30SLisandro Dalcin                                        NULL,
1900f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1901f4259b30SLisandro Dalcin                                        NULL,
1902f4259b30SLisandro Dalcin                                        NULL,
1903f4259b30SLisandro Dalcin                                        NULL,
1904f4259b30SLisandro Dalcin                                        NULL,
190546533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
1906f4259b30SLisandro Dalcin                                        NULL,
1907f4259b30SLisandro Dalcin                                        NULL,
1908f4259b30SLisandro Dalcin                                        NULL,
1909f4259b30SLisandro Dalcin                                        NULL,
1910d70f29a3SPierre Jolivet                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1911d70f29a3SPierre Jolivet                                        NULL,
1912d70f29a3SPierre Jolivet                                        NULL,
191399a7f59eSMark Adams                                        NULL,
191499a7f59eSMark Adams                                        NULL,
19157fb60732SBarry Smith                                        NULL,
1916dec0b466SHong Zhang                                        /*150*/ NULL,
1917dec0b466SHong Zhang                                        NULL};
1918a30f8f8cSSatish Balay 
1919d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1920d71ae5a4SJacob Faibussowitsch {
1921476417e5SBarry Smith   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1922535b19f3SBarry Smith   PetscInt      i, mbs, Mbs;
19235d2a9ed1SStefano Zampini   PetscMPIInt   size;
1924a23d5eceSKris Buschelman 
1925a23d5eceSKris Buschelman   PetscFunctionBegin;
1926ad79cf63SBarry Smith   if (B->hash_active) {
1927aea10558SJacob Faibussowitsch     B->ops[0]      = b->cops;
1928ad79cf63SBarry Smith     B->hash_active = PETSC_FALSE;
1929ad79cf63SBarry Smith   }
1930ad79cf63SBarry Smith   if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
19319566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
19329566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19339566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
19349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
19355f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
19365f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1937899cda47SBarry Smith 
1938d0f46423SBarry Smith   mbs = B->rmap->n / bs;
1939d0f46423SBarry Smith   Mbs = B->rmap->N / bs;
19405f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1941a23d5eceSKris Buschelman 
1942d0f46423SBarry Smith   B->rmap->bs = bs;
1943a23d5eceSKris Buschelman   b->bs2      = bs * bs;
1944a23d5eceSKris Buschelman   b->mbs      = mbs;
1945a23d5eceSKris Buschelman   b->Mbs      = Mbs;
1946de64b629SHong Zhang   b->nbs      = B->cmap->n / bs;
1947de64b629SHong Zhang   b->Nbs      = B->cmap->N / bs;
1948a23d5eceSKris Buschelman 
1949ad540459SPierre Jolivet   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1950d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart / bs;
1951d0f46423SBarry Smith   b->rendbs   = B->rmap->rend / bs;
1952a23d5eceSKris Buschelman 
1953d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart / bs;
1954d0f46423SBarry Smith   b->cendbs   = B->cmap->rend / bs;
1955a23d5eceSKris Buschelman 
1956cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
1957eec179cfSJacob Faibussowitsch   PetscCall(PetscHMapIDestroy(&b->colmap));
1958cb7b82ddSBarry Smith #else
19599566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
1960cb7b82ddSBarry Smith #endif
19619566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
19629566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
19639566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
19649566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0));
19659566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0b));
19669566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1));
19679566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1a));
19689566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1b));
19699566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->sMvctx));
1970cb7b82ddSBarry Smith 
19719566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
19729566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
19739566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
19749566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
19759566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1976cb7b82ddSBarry Smith 
1977ad79cf63SBarry Smith   PetscCall(MatDestroy(&b->A));
19789566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
19799566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
19809566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1981a23d5eceSKris Buschelman 
19829566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
19839566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
198426fbe8dcSKarl Rupp 
1985526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
1986cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
1987cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
19883ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
1989a23d5eceSKris Buschelman }
1990a23d5eceSKris Buschelman 
1991d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1992d71ae5a4SJacob Faibussowitsch {
199302106b30SBarry Smith   PetscInt        m, rstart, cend;
1994f4259b30SLisandro Dalcin   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1995f4259b30SLisandro Dalcin   const PetscInt *JJ          = NULL;
1996f4259b30SLisandro Dalcin   PetscScalar    *values      = NULL;
1997bb80cfbbSStefano Zampini   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
19983bd0feecSPierre Jolivet   PetscBool       nooffprocentries;
1999dfb205c3SBarry Smith 
2000dfb205c3SBarry Smith   PetscFunctionBegin;
20015f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
20029566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
20039566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
20049566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
20059566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
20069566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2007dfb205c3SBarry Smith   m      = B->rmap->n / bs;
2008dfb205c3SBarry Smith   rstart = B->rmap->rstart / bs;
2009dfb205c3SBarry Smith   cend   = B->cmap->rend / bs;
2010dfb205c3SBarry Smith 
20115f80ce2aSJacob Faibussowitsch   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
20129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2013dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2014dfb205c3SBarry Smith     nz = ii[i + 1] - ii[i];
20155f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
20160cd7f59aSBarry Smith     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2017dfb205c3SBarry Smith     JJ = jj + ii[i];
20180cd7f59aSBarry Smith     bd = 0;
2019dfb205c3SBarry Smith     for (j = 0; j < nz; j++) {
20200cd7f59aSBarry Smith       if (*JJ >= i + rstart) break;
2021dfb205c3SBarry Smith       JJ++;
20220cd7f59aSBarry Smith       bd++;
2023dfb205c3SBarry Smith     }
2024dfb205c3SBarry Smith     d = 0;
2025dfb205c3SBarry Smith     for (; j < nz; j++) {
2026dfb205c3SBarry Smith       if (*JJ++ >= cend) break;
2027dfb205c3SBarry Smith       d++;
2028dfb205c3SBarry Smith     }
2029dfb205c3SBarry Smith     d_nnz[i] = d;
20300cd7f59aSBarry Smith     o_nnz[i] = nz - d - bd;
20310cd7f59aSBarry Smith     nz       = nz - bd;
20320cd7f59aSBarry Smith     nz_max   = PetscMax(nz_max, nz);
2033dfb205c3SBarry Smith   }
20349566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
20359566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
20369566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
2037dfb205c3SBarry Smith 
2038dfb205c3SBarry Smith   values = (PetscScalar *)V;
203948a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2040dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2041dfb205c3SBarry Smith     PetscInt        row   = i + rstart;
2042dfb205c3SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
2043dfb205c3SBarry Smith     const PetscInt *icols = jj + ii[i];
2044bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2045dfb205c3SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
20469566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2047bb80cfbbSStefano Zampini     } else { /* block ordering does not match so we can only insert one block at a time. */
2048bb80cfbbSStefano Zampini       PetscInt j;
20490cd7f59aSBarry Smith       for (j = 0; j < ncols; j++) {
20500cd7f59aSBarry Smith         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
20519566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
20520cd7f59aSBarry Smith       }
20530cd7f59aSBarry Smith     }
2054dfb205c3SBarry Smith   }
2055dfb205c3SBarry Smith 
20569566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
20573bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
20583bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
20599566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
20609566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
20613bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
20623bd0feecSPierre Jolivet 
20639566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
20643ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2065dfb205c3SBarry Smith }
2066dfb205c3SBarry Smith 
20670bad9183SKris Buschelman /*MC
2068fafad747SKris Buschelman    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2069828413b8SBarry Smith    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2070828413b8SBarry Smith    the matrix is stored.
2071828413b8SBarry Smith 
2072828413b8SBarry Smith    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
207311a5261eSBarry Smith    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
20740bad9183SKris Buschelman 
20752ef1f0ffSBarry Smith    Options Database Key:
207611a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
20770bad9183SKris Buschelman 
20782ef1f0ffSBarry Smith    Level: beginner
20792ef1f0ffSBarry Smith 
208011a5261eSBarry Smith    Note:
2081476417e5SBarry Smith      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2082476417e5SBarry Smith      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2083476417e5SBarry Smith 
20841cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
20850bad9183SKris Buschelman M*/
20860bad9183SKris Buschelman 
2087d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2088d71ae5a4SJacob Faibussowitsch {
2089b5df2d14SHong Zhang   Mat_MPISBAIJ *b;
209094ae4db5SBarry Smith   PetscBool     flg = PETSC_FALSE;
2091b5df2d14SHong Zhang 
2092b5df2d14SHong Zhang   PetscFunctionBegin;
20934dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
2094b0a32e0cSBarry Smith   B->data   = (void *)b;
2095aea10558SJacob Faibussowitsch   B->ops[0] = MatOps_Values;
2096b5df2d14SHong Zhang 
2097b5df2d14SHong Zhang   B->ops->destroy = MatDestroy_MPISBAIJ;
2098b5df2d14SHong Zhang   B->ops->view    = MatView_MPISBAIJ;
2099b5df2d14SHong Zhang   B->assembled    = PETSC_FALSE;
2100b5df2d14SHong Zhang   B->insertmode   = NOT_SET_VALUES;
210126fbe8dcSKarl Rupp 
21029566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
21039566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2104b5df2d14SHong Zhang 
2105b5df2d14SHong Zhang   /* build local table of row and column ownerships */
21069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2107b5df2d14SHong Zhang 
2108b5df2d14SHong Zhang   /* build cache for off array entries formed */
21099566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
211026fbe8dcSKarl Rupp 
2111b5df2d14SHong Zhang   b->donotstash  = PETSC_FALSE;
21120298fd71SBarry Smith   b->colmap      = NULL;
21130298fd71SBarry Smith   b->garray      = NULL;
2114b5df2d14SHong Zhang   b->roworiented = PETSC_TRUE;
2115b5df2d14SHong Zhang 
2116b5df2d14SHong Zhang   /* stuff used in block assembly */
2117f4259b30SLisandro Dalcin   b->barray = NULL;
2118b5df2d14SHong Zhang 
2119b5df2d14SHong Zhang   /* stuff used for matrix vector multiply */
2120f4259b30SLisandro Dalcin   b->lvec    = NULL;
2121f4259b30SLisandro Dalcin   b->Mvctx   = NULL;
2122f4259b30SLisandro Dalcin   b->slvec0  = NULL;
2123f4259b30SLisandro Dalcin   b->slvec0b = NULL;
2124f4259b30SLisandro Dalcin   b->slvec1  = NULL;
2125f4259b30SLisandro Dalcin   b->slvec1a = NULL;
2126f4259b30SLisandro Dalcin   b->slvec1b = NULL;
2127f4259b30SLisandro Dalcin   b->sMvctx  = NULL;
2128b5df2d14SHong Zhang 
2129b5df2d14SHong Zhang   /* stuff for MatGetRow() */
2130f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2131f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2132b5df2d14SHong Zhang   b->getrowactive = PETSC_FALSE;
2133b5df2d14SHong Zhang 
2134b5df2d14SHong Zhang   /* hash table stuff */
2135f4259b30SLisandro Dalcin   b->ht           = NULL;
2136f4259b30SLisandro Dalcin   b->hd           = NULL;
2137b5df2d14SHong Zhang   b->ht_size      = 0;
2138b5df2d14SHong Zhang   b->ht_flag      = PETSC_FALSE;
2139b5df2d14SHong Zhang   b->ht_fact      = 0;
2140b5df2d14SHong Zhang   b->ht_total_ct  = 0;
2141b5df2d14SHong Zhang   b->ht_insert_ct = 0;
2142b5df2d14SHong Zhang 
21437dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
21447a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
21457a868f3eSHong Zhang 
2146f4259b30SLisandro Dalcin   b->in_loc = NULL;
2147f4259b30SLisandro Dalcin   b->v_loc  = NULL;
214859ffdab8SBarry Smith   b->n_loc  = 0;
214994ae4db5SBarry Smith 
21509566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
21519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
21529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
21539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
21546214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
21559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
21566214f412SHong Zhang #endif
2157d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
21589566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2159d24d4204SJose E. Roman #endif
21609566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
21619566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2162aa5a9175SDahai Guo 
2163b94d7dedSBarry Smith   B->symmetric                   = PETSC_BOOL3_TRUE;
2164b94d7dedSBarry Smith   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2165b94d7dedSBarry Smith   B->symmetry_eternal            = PETSC_TRUE;
2166b94d7dedSBarry Smith   B->structural_symmetry_eternal = PETSC_TRUE;
2167eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
2168b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_FALSE;
2169eb1ec7c1SStefano Zampini #else
2170b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_TRUE;
2171eb1ec7c1SStefano Zampini #endif
217213647f61SHong Zhang 
21739566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2174d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
21759566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
217694ae4db5SBarry Smith   if (flg) {
217794ae4db5SBarry Smith     PetscReal fact = 1.39;
21789566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
21799566063dSJacob Faibussowitsch     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
218094ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
21819566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
21829566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
218394ae4db5SBarry Smith   }
2184d0609cedSBarry Smith   PetscOptionsEnd();
21853ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2186b5df2d14SHong Zhang }
2187b5df2d14SHong Zhang 
2188209238afSKris Buschelman /*MC
2189002d173eSKris Buschelman    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2190209238afSKris Buschelman 
219111a5261eSBarry Smith    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
219211a5261eSBarry Smith    and `MATMPISBAIJ` otherwise.
2193209238afSKris Buschelman 
219411a5261eSBarry Smith    Options Database Key:
2195c5dec841SPierre Jolivet . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2196209238afSKris Buschelman 
2197209238afSKris Buschelman   Level: beginner
2198209238afSKris Buschelman 
21991cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2200209238afSKris Buschelman M*/
2201209238afSKris Buschelman 
2202b5df2d14SHong Zhang /*@C
2203b5df2d14SHong Zhang   MatMPISBAIJSetPreallocation - For good matrix assembly performance
2204b5df2d14SHong Zhang   the user should preallocate the matrix storage by setting the parameters
2205b5df2d14SHong Zhang   d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2206b5df2d14SHong Zhang   performance can be increased by more than a factor of 50.
2207b5df2d14SHong Zhang 
2208c3339decSBarry Smith   Collective
2209b5df2d14SHong Zhang 
2210b5df2d14SHong Zhang   Input Parameters:
22111c4f3114SJed Brown + B     - the matrix
2212bb7ae925SBarry Smith . bs    - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2213bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2214b5df2d14SHong Zhang . d_nz  - number of block nonzeros per block row in diagonal portion of local
2215b5df2d14SHong Zhang            submatrix  (same for all local rows)
2216b5df2d14SHong Zhang . d_nnz - array containing the number of block nonzeros in the various block rows
22176d10fdaeSSatish Balay            in the upper triangular and diagonal part of the in diagonal portion of the local
22182ef1f0ffSBarry Smith            (possibly different for each block row) or `NULL`.  If you plan to factor the matrix you must leave room
221995742e49SBarry Smith            for the diagonal entry and set a value even if it is zero.
2220b5df2d14SHong Zhang . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2221b5df2d14SHong Zhang            submatrix (same for all local rows).
2222b5df2d14SHong Zhang - o_nnz - array containing the number of nonzeros in the various block rows of the
2223c2fc9fa9SBarry Smith            off-diagonal portion of the local submatrix that is right of the diagonal
22242ef1f0ffSBarry Smith            (possibly different for each block row) or `NULL`.
2225b5df2d14SHong Zhang 
2226b5df2d14SHong Zhang   Options Database Keys:
2227a2b725a8SWilliam Gropp + -mat_no_unroll  - uses code that does not unroll the loops in the
2228b5df2d14SHong Zhang                      block calculations (much slower)
2229a2b725a8SWilliam Gropp - -mat_block_size - size of the blocks to use
2230b5df2d14SHong Zhang 
22312ef1f0ffSBarry Smith   Level: intermediate
22322ef1f0ffSBarry Smith 
2233b5df2d14SHong Zhang   Notes:
2234b5df2d14SHong Zhang 
223511a5261eSBarry Smith   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2236b5df2d14SHong Zhang   than it must be used on all processors that share the object for that argument.
2237b5df2d14SHong Zhang 
223849a6f317SBarry Smith   If the *_nnz parameter is given then the *_nz parameter is ignored
223949a6f317SBarry Smith 
2240b5df2d14SHong Zhang   Storage Information:
2241b5df2d14SHong Zhang   For a square global matrix we define each processor's diagonal portion
2242b5df2d14SHong Zhang   to be its local rows and the corresponding columns (a square submatrix);
2243b5df2d14SHong Zhang   each processor's off-diagonal portion encompasses the remainder of the
2244b5df2d14SHong Zhang   local matrix (a rectangular submatrix).
2245b5df2d14SHong Zhang 
2246b5df2d14SHong Zhang   The user can specify preallocated storage for the diagonal part of
22472ef1f0ffSBarry Smith   the local submatrix with either `d_nz` or `d_nnz` (not both).  Set
22482ef1f0ffSBarry Smith   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2249b5df2d14SHong Zhang   memory allocation.  Likewise, specify preallocated storage for the
22502ef1f0ffSBarry Smith   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2251b5df2d14SHong Zhang 
225211a5261eSBarry Smith   You can call `MatGetInfo()` to get information on how effective the preallocation was;
2253aa95bbe8SBarry Smith   for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
22542ef1f0ffSBarry Smith   You can also run with the option `-info` and look for messages with the string
2255aa95bbe8SBarry Smith   malloc in them to see if additional memory allocation was needed.
2256aa95bbe8SBarry Smith 
2257b5df2d14SHong Zhang   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2258b5df2d14SHong Zhang   the figure below we depict these three local rows and all columns (0-11).
2259b5df2d14SHong Zhang 
2260b5df2d14SHong Zhang .vb
2261b5df2d14SHong Zhang            0 1 2 3 4 5 6 7 8 9 10 11
2262a4b1a0f6SJed Brown           --------------------------
2263c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2264c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2265c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2266a4b1a0f6SJed Brown           --------------------------
2267b5df2d14SHong Zhang .ve
2268b5df2d14SHong Zhang 
2269b5df2d14SHong Zhang   Thus, any entries in the d locations are stored in the d (diagonal)
2270b5df2d14SHong Zhang   submatrix, and any entries in the o locations are stored in the
22716d10fdaeSSatish Balay   o (off-diagonal) submatrix.  Note that the d matrix is stored in
227211a5261eSBarry Smith   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2273b5df2d14SHong Zhang 
22742ef1f0ffSBarry Smith   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
22756d10fdaeSSatish Balay   plus the diagonal part of the d matrix,
22762ef1f0ffSBarry Smith   and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2277c2fc9fa9SBarry Smith 
2278b5df2d14SHong Zhang   In general, for PDE problems in which most nonzeros are near the diagonal,
22792ef1f0ffSBarry Smith   one expects `d_nz` >> `o_nz`.
2280b5df2d14SHong Zhang 
22811cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2282b5df2d14SHong Zhang @*/
2283d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2284d71ae5a4SJacob Faibussowitsch {
2285b5df2d14SHong Zhang   PetscFunctionBegin;
22866ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
22876ba663aaSJed Brown   PetscValidType(B, 1);
22886ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
2289cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
22903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2291b5df2d14SHong Zhang }
2292b5df2d14SHong Zhang 
2293a30f8f8cSSatish Balay /*@C
229411a5261eSBarry Smith   MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2295a30f8f8cSSatish Balay   (block compressed row).  For good matrix assembly performance
2296a30f8f8cSSatish Balay   the user should preallocate the matrix storage by setting the parameters
229720f4b53cSBarry Smith   `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2298a30f8f8cSSatish Balay 
2299d083f849SBarry Smith   Collective
2300a30f8f8cSSatish Balay 
2301a30f8f8cSSatish Balay   Input Parameters:
2302a30f8f8cSSatish Balay + comm  - MPI communicator
230311a5261eSBarry Smith . bs    - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
230420f4b53cSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
230520f4b53cSBarry Smith . m     - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2306a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2307a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
230820f4b53cSBarry Smith . n     - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2309a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2310a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
231120f4b53cSBarry Smith . M     - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
231220f4b53cSBarry Smith . N     - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2313a30f8f8cSSatish Balay . d_nz  - number of block nonzeros per block row in diagonal portion of local
2314a30f8f8cSSatish Balay            submatrix (same for all local rows)
2315a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows
23166d10fdaeSSatish Balay            in the upper triangular portion of the in diagonal portion of the local
23172ef1f0ffSBarry Smith            (possibly different for each block block row) or `NULL`.
231895742e49SBarry Smith            If you plan to factor the matrix you must leave room for the diagonal entry and
231995742e49SBarry Smith            set its value even if it is zero.
2320a30f8f8cSSatish Balay . o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2321a30f8f8cSSatish Balay            submatrix (same for all local rows).
2322a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the
2323a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
23242ef1f0ffSBarry Smith            each block row) or `NULL`.
2325a30f8f8cSSatish Balay 
2326a30f8f8cSSatish Balay   Output Parameter:
2327a30f8f8cSSatish Balay . A - the matrix
2328a30f8f8cSSatish Balay 
2329a30f8f8cSSatish Balay   Options Database Keys:
2330a2b725a8SWilliam Gropp + -mat_no_unroll  - uses code that does not unroll the loops in the
2331a30f8f8cSSatish Balay                      block calculations (much slower)
2332a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use
2333a2b725a8SWilliam Gropp - -mat_mpi        - use the parallel matrix data structures even on one processor
2334a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2335a30f8f8cSSatish Balay 
23362ef1f0ffSBarry Smith   Level: intermediate
23372ef1f0ffSBarry Smith 
23382ef1f0ffSBarry Smith   Notes:
233911a5261eSBarry Smith   It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2340f6f02116SRichard Tran Mills   MatXXXXSetPreallocation() paradigm instead of this routine directly.
234111a5261eSBarry Smith   [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2342175b88e8SBarry Smith 
2343d1be2dadSMatthew Knepley   The number of rows and columns must be divisible by blocksize.
23446d6d819aSHong Zhang   This matrix type does not support complex Hermitian operation.
2345d1be2dadSMatthew Knepley 
2346a30f8f8cSSatish Balay   The user MUST specify either the local or global matrix dimensions
2347a30f8f8cSSatish Balay   (possibly both).
2348a30f8f8cSSatish Balay 
234911a5261eSBarry Smith   If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2350a30f8f8cSSatish Balay   than it must be used on all processors that share the object for that argument.
2351a30f8f8cSSatish Balay 
235249a6f317SBarry Smith   If the *_nnz parameter is given then the *_nz parameter is ignored
235349a6f317SBarry Smith 
2354a30f8f8cSSatish Balay   Storage Information:
2355a30f8f8cSSatish Balay   For a square global matrix we define each processor's diagonal portion
2356a30f8f8cSSatish Balay   to be its local rows and the corresponding columns (a square submatrix);
2357a30f8f8cSSatish Balay   each processor's off-diagonal portion encompasses the remainder of the
2358a30f8f8cSSatish Balay   local matrix (a rectangular submatrix).
2359a30f8f8cSSatish Balay 
2360a30f8f8cSSatish Balay   The user can specify preallocated storage for the diagonal part of
23612ef1f0ffSBarry Smith   the local submatrix with either `d_nz` or `d_nnz` (not both). Set
23622ef1f0ffSBarry Smith   `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2363a30f8f8cSSatish Balay   memory allocation. Likewise, specify preallocated storage for the
23642ef1f0ffSBarry Smith   off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2365a30f8f8cSSatish Balay 
2366a30f8f8cSSatish Balay   Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2367a30f8f8cSSatish Balay   the figure below we depict these three local rows and all columns (0-11).
2368a30f8f8cSSatish Balay 
2369a30f8f8cSSatish Balay .vb
2370a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2371a4b1a0f6SJed Brown           --------------------------
2372c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2373c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2374c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2375a4b1a0f6SJed Brown           --------------------------
2376a30f8f8cSSatish Balay .ve
2377a30f8f8cSSatish Balay 
2378a30f8f8cSSatish Balay   Thus, any entries in the d locations are stored in the d (diagonal)
2379a30f8f8cSSatish Balay   submatrix, and any entries in the o locations are stored in the
23806d10fdaeSSatish Balay   o (off-diagonal) submatrix. Note that the d matrix is stored in
23812ef1f0ffSBarry Smith   `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2382a30f8f8cSSatish Balay 
23832ef1f0ffSBarry Smith   Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
23846d10fdaeSSatish Balay   plus the diagonal part of the d matrix,
23852ef1f0ffSBarry Smith   and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2386a30f8f8cSSatish Balay   In general, for PDE problems in which most nonzeros are near the diagonal,
23872ef1f0ffSBarry Smith   one expects `d_nz` >> `o_nz`.
2388a30f8f8cSSatish Balay 
23891cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2390a30f8f8cSSatish Balay @*/
2391d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2392d71ae5a4SJacob Faibussowitsch {
23931302d50aSBarry Smith   PetscMPIInt size;
2394a30f8f8cSSatish Balay 
2395a30f8f8cSSatish Balay   PetscFunctionBegin;
23969566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
23979566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23989566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2399273d9f13SBarry Smith   if (size > 1) {
24009566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPISBAIJ));
24019566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2402273d9f13SBarry Smith   } else {
24039566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQSBAIJ));
24049566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2405273d9f13SBarry Smith   }
24063ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2407a30f8f8cSSatish Balay }
2408a30f8f8cSSatish Balay 
2409d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2410d71ae5a4SJacob Faibussowitsch {
2411a30f8f8cSSatish Balay   Mat           mat;
2412a30f8f8cSSatish Balay   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2413d0f46423SBarry Smith   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2414387bc808SHong Zhang   PetscScalar  *array;
2415a30f8f8cSSatish Balay 
2416a30f8f8cSSatish Balay   PetscFunctionBegin;
2417f4259b30SLisandro Dalcin   *newmat = NULL;
241826fbe8dcSKarl Rupp 
24199566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
24209566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
24219566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
24229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
24239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2424e1b6402fSHong Zhang 
2425d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2426273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
242782327fa8SHong Zhang   mat->assembled    = PETSC_TRUE;
24287fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24297fff6886SHong Zhang 
2430b5df2d14SHong Zhang   a      = (Mat_MPISBAIJ *)mat->data;
2431a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2432a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2433a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2434a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2435a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2436a30f8f8cSSatish Balay 
2437a30f8f8cSSatish Balay   a->size         = oldmat->size;
2438a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2439a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2440a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2441f4259b30SLisandro Dalcin   a->rowindices   = NULL;
2442f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
2443a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2444f4259b30SLisandro Dalcin   a->barray       = NULL;
2445899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
2446899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
2447899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
2448899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
2449a30f8f8cSSatish Balay 
2450a30f8f8cSSatish Balay   /* hash table stuff */
2451f4259b30SLisandro Dalcin   a->ht           = NULL;
2452f4259b30SLisandro Dalcin   a->hd           = NULL;
2453a30f8f8cSSatish Balay   a->ht_size      = 0;
2454a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2455a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2456a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2457a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2458a30f8f8cSSatish Balay 
24599566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2460a30f8f8cSSatish Balay   if (oldmat->colmap) {
2461a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2462eec179cfSJacob Faibussowitsch     PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2463a30f8f8cSSatish Balay #else
24649566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
24659566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2466a30f8f8cSSatish Balay #endif
2467f4259b30SLisandro Dalcin   } else a->colmap = NULL;
2468387bc808SHong Zhang 
2469a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
24709566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len, &a->garray));
24719566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2472f4259b30SLisandro Dalcin   } else a->garray = NULL;
2473a30f8f8cSSatish Balay 
24749566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
24759566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
24769566063dSJacob Faibussowitsch   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
247782327fa8SHong Zhang 
24789566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
24799566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2480387bc808SHong Zhang 
24819566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(a->slvec1, &nt));
24829566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec1, &array));
24839566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
24849566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
24859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec1, &array));
24869566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &array));
24879566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
24889566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &array));
2489387bc808SHong Zhang 
2490387bc808SHong Zhang   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
24919566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2492387bc808SHong Zhang   a->sMvctx = oldmat->sMvctx;
249382327fa8SHong Zhang 
24949566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
24959566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
24969566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2497a30f8f8cSSatish Balay   *newmat = mat;
24983ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2499a30f8f8cSSatish Balay }
2500a30f8f8cSSatish Balay 
2501618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
2502618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2503618cc2edSLisandro Dalcin 
2504d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2505d71ae5a4SJacob Faibussowitsch {
25067f489da9SVaclav Hapla   PetscBool isbinary;
250795936485SShri Abhyankar 
250895936485SShri Abhyankar   PetscFunctionBegin;
25099566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
25105f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
25119566063dSJacob Faibussowitsch   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
25123ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
251395936485SShri Abhyankar }
251495936485SShri Abhyankar 
2515d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2516d71ae5a4SJacob Faibussowitsch {
251724d5174aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2518f4c0e9e4SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2519ca54ac64SHong Zhang   PetscReal     atmp;
252087828ca2SBarry Smith   PetscReal    *work, *svalues, *rvalues;
25211302d50aSBarry Smith   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
25221302d50aSBarry Smith   PetscMPIInt   rank, size;
25231302d50aSBarry Smith   PetscInt     *rowners_bs, dest, count, source;
252487828ca2SBarry Smith   PetscScalar  *va;
25258a1c53f2SBarry Smith   MatScalar    *ba;
2526f4c0e9e4SHong Zhang   MPI_Status    stat;
252724d5174aSHong Zhang 
252824d5174aSHong Zhang   PetscFunctionBegin;
25295f80ce2aSJacob Faibussowitsch   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
25309566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
25319566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &va));
2532f4c0e9e4SHong Zhang 
25339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
25349566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2535f4c0e9e4SHong Zhang 
2536d0f46423SBarry Smith   bs  = A->rmap->bs;
2537f4c0e9e4SHong Zhang   mbs = a->mbs;
2538f4c0e9e4SHong Zhang   Mbs = a->Mbs;
2539f4c0e9e4SHong Zhang   ba  = b->a;
2540f4c0e9e4SHong Zhang   bi  = b->i;
2541f4c0e9e4SHong Zhang   bj  = b->j;
2542f4c0e9e4SHong Zhang 
2543f4c0e9e4SHong Zhang   /* find ownerships */
2544d0f46423SBarry Smith   rowners_bs = A->rmap->range;
2545f4c0e9e4SHong Zhang 
2546f4c0e9e4SHong Zhang   /* each proc creates an array to be distributed */
25479566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs * Mbs, &work));
2548f4c0e9e4SHong Zhang 
2549f4c0e9e4SHong Zhang   /* row_max for B */
2550b8475685SHong Zhang   if (rank != size - 1) {
2551f4c0e9e4SHong Zhang     for (i = 0; i < mbs; i++) {
25529371c9d4SSatish Balay       ncols = bi[1] - bi[0];
25539371c9d4SSatish Balay       bi++;
2554f4c0e9e4SHong Zhang       brow = bs * i;
2555f4c0e9e4SHong Zhang       for (j = 0; j < ncols; j++) {
2556f4c0e9e4SHong Zhang         bcol = bs * (*bj);
2557f4c0e9e4SHong Zhang         for (kcol = 0; kcol < bs; kcol++) {
2558ca54ac64SHong Zhang           col = bcol + kcol;           /* local col index */
255904d41228SHong Zhang           col += rowners_bs[rank + 1]; /* global col index */
2560f4c0e9e4SHong Zhang           for (krow = 0; krow < bs; krow++) {
25619371c9d4SSatish Balay             atmp = PetscAbsScalar(*ba);
25629371c9d4SSatish Balay             ba++;
2563ca54ac64SHong Zhang             row = brow + krow; /* local row index */
2564ca54ac64SHong Zhang             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2565f4c0e9e4SHong Zhang             if (work[col] < atmp) work[col] = atmp;
2566f4c0e9e4SHong Zhang           }
2567f4c0e9e4SHong Zhang         }
2568f4c0e9e4SHong Zhang         bj++;
2569f4c0e9e4SHong Zhang       }
2570f4c0e9e4SHong Zhang     }
2571f4c0e9e4SHong Zhang 
2572f4c0e9e4SHong Zhang     /* send values to its owners */
2573f4c0e9e4SHong Zhang     for (dest = rank + 1; dest < size; dest++) {
2574f4c0e9e4SHong Zhang       svalues = work + rowners_bs[dest];
2575ca54ac64SHong Zhang       count   = rowners_bs[dest + 1] - rowners_bs[dest];
25769566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2577ca54ac64SHong Zhang     }
2578f4c0e9e4SHong Zhang   }
2579f4c0e9e4SHong Zhang 
2580f4c0e9e4SHong Zhang   /* receive values */
2581ca54ac64SHong Zhang   if (rank) {
2582f4c0e9e4SHong Zhang     rvalues = work;
2583ca54ac64SHong Zhang     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2584f4c0e9e4SHong Zhang     for (source = 0; source < rank; source++) {
25859566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2586f4c0e9e4SHong Zhang       /* process values */
2587f4c0e9e4SHong Zhang       for (i = 0; i < count; i++) {
2588ca54ac64SHong Zhang         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2589f4c0e9e4SHong Zhang       }
2590f4c0e9e4SHong Zhang     }
2591ca54ac64SHong Zhang   }
2592f4c0e9e4SHong Zhang 
25939566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &va));
25949566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
25953ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
259624d5174aSHong Zhang }
25972798e883SHong Zhang 
2598d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2599d71ae5a4SJacob Faibussowitsch {
26002798e883SHong Zhang   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2601d0f46423SBarry Smith   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
26023649974fSBarry Smith   PetscScalar       *x, *ptr, *from;
2603ffe4fb16SHong Zhang   Vec                bb1;
26043649974fSBarry Smith   const PetscScalar *b;
2605ffe4fb16SHong Zhang 
2606ffe4fb16SHong Zhang   PetscFunctionBegin;
26075f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
26085f80ce2aSJacob Faibussowitsch   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2609ffe4fb16SHong Zhang 
2610a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
26119566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
26123ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
2613a2b30743SBarry Smith   }
2614a2b30743SBarry Smith 
2615ffe4fb16SHong Zhang   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2616ffe4fb16SHong Zhang     if (flag & SOR_ZERO_INITIAL_GUESS) {
26179566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2618ffe4fb16SHong Zhang       its--;
2619ffe4fb16SHong Zhang     }
2620ffe4fb16SHong Zhang 
26219566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb, &bb1));
2622ffe4fb16SHong Zhang     while (its--) {
2623ffe4fb16SHong Zhang       /* lower triangular part: slvec0b = - B^T*xx */
26249566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2625ffe4fb16SHong Zhang 
2626ffe4fb16SHong Zhang       /* copy xx into slvec0a */
26279566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec0, &ptr));
26289566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26299566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
26309566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2631ffe4fb16SHong Zhang 
26329566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->slvec0, -1.0));
2633ffe4fb16SHong Zhang 
2634ffe4fb16SHong Zhang       /* copy bb into slvec1a */
26359566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1, &ptr));
26369566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26379566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
26389566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2639ffe4fb16SHong Zhang 
2640ffe4fb16SHong Zhang       /* set slvec1b = 0 */
2641629a200eSBarry Smith       PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2642629a200eSBarry Smith       PetscCall(VecZeroEntries(mat->slvec1b));
2643ffe4fb16SHong Zhang 
26449566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
26459566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
26469566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
26479566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2648ffe4fb16SHong Zhang 
2649ffe4fb16SHong Zhang       /* upper triangular part: bb1 = bb1 - B*x */
26509566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2651ffe4fb16SHong Zhang 
2652ffe4fb16SHong Zhang       /* local diagonal sweep */
26539566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2654ffe4fb16SHong Zhang     }
26559566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&bb1));
2656fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26579566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2658fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26599566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2660fa22f6d0SBarry Smith   } else if (flag & SOR_EISENSTAT) {
2661fa22f6d0SBarry Smith     Vec                xx1;
2662ace3abfcSBarry Smith     PetscBool          hasop;
266320f1ed55SBarry Smith     const PetscScalar *diag;
2664887ee2caSBarry Smith     PetscScalar       *sl, scale = (omega - 2.0) / omega;
266520f1ed55SBarry Smith     PetscInt           i, n;
2666fa22f6d0SBarry Smith 
2667fa22f6d0SBarry Smith     if (!mat->xx1) {
26689566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->xx1));
26699566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->bb1));
2670fa22f6d0SBarry Smith     }
2671fa22f6d0SBarry Smith     xx1 = mat->xx1;
2672fa22f6d0SBarry Smith     bb1 = mat->bb1;
2673fa22f6d0SBarry Smith 
26749566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2675fa22f6d0SBarry Smith 
2676fa22f6d0SBarry Smith     if (!mat->diag) {
2677effcda25SBarry Smith       /* this is wrong for same matrix with new nonzero values */
26789566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
26799566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin, mat->diag));
2680fa22f6d0SBarry Smith     }
26819566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2682fa22f6d0SBarry Smith 
2683fa22f6d0SBarry Smith     if (hasop) {
26849566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
26859566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
268620f1ed55SBarry Smith     } else {
268720f1ed55SBarry Smith       /*
268820f1ed55SBarry Smith           These two lines are replaced by code that may be a bit faster for a good compiler
26899566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
26909566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
269120f1ed55SBarry Smith       */
26929566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1a, &sl));
26939566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(mat->diag, &diag));
26949566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26959566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26969566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(xx, &n));
2697887ee2caSBarry Smith       if (omega == 1.0) {
269826fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
26999566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0 * n));
2700887ee2caSBarry Smith       } else {
270126fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
27029566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(3.0 * n));
2703887ee2caSBarry Smith       }
27049566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
27059566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
27069566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
27079566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
270820f1ed55SBarry Smith     }
2709fa22f6d0SBarry Smith 
2710fa22f6d0SBarry Smith     /* multiply off-diagonal portion of matrix */
2711629a200eSBarry Smith     PetscCall(PetscObjectStateIncrease((PetscObject)mat->slvec1b));
2712629a200eSBarry Smith     PetscCall(VecZeroEntries(mat->slvec1b));
27139566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
27149566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mat->slvec0, &from));
27159566063dSJacob Faibussowitsch     PetscCall(VecGetArray(xx, &x));
27169566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(from, x, bs * mbs));
27179566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mat->slvec0, &from));
27189566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(xx, &x));
27199566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27209566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27219566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2722fa22f6d0SBarry Smith 
2723fa22f6d0SBarry Smith     /* local sweep */
27249566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
27259566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx, 1.0, xx1));
2726f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
27273ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2728ffe4fb16SHong Zhang }
2729ffe4fb16SHong Zhang 
2730dfb205c3SBarry Smith /*@
273111a5261eSBarry Smith   MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2732dfb205c3SBarry Smith   CSR format the local rows.
2733dfb205c3SBarry Smith 
2734d083f849SBarry Smith   Collective
2735dfb205c3SBarry Smith 
2736dfb205c3SBarry Smith   Input Parameters:
2737dfb205c3SBarry Smith + comm - MPI communicator
2738dfb205c3SBarry Smith . bs   - the block size, only a block size of 1 is supported
273911a5261eSBarry Smith . m    - number of local rows (Cannot be `PETSC_DECIDE`)
2740dfb205c3SBarry Smith . n    - This value should be the same as the local size used in creating the
274111a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
27422ef1f0ffSBarry Smith        calculated if `N` is given) For square matrices `n` is almost always `m`.
27432ef1f0ffSBarry Smith . M    - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
27442ef1f0ffSBarry Smith . N    - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2745483a2f95SBarry Smith . i    - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2746dfb205c3SBarry Smith . j    - column indices
2747dfb205c3SBarry Smith - a    - matrix values
2748dfb205c3SBarry Smith 
2749dfb205c3SBarry Smith   Output Parameter:
2750dfb205c3SBarry Smith . mat - the matrix
2751dfb205c3SBarry Smith 
2752dfb205c3SBarry Smith   Level: intermediate
2753dfb205c3SBarry Smith 
2754dfb205c3SBarry Smith   Notes:
27552ef1f0ffSBarry Smith   The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
27562ef1f0ffSBarry Smith   thus you CANNOT change the matrix entries by changing the values of `a` after you have
27572ef1f0ffSBarry Smith   called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2758dfb205c3SBarry Smith 
27592ef1f0ffSBarry Smith   The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2760dfb205c3SBarry Smith 
27611cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2762db781477SPatrick Sanan           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2763dfb205c3SBarry Smith @*/
2764d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2765d71ae5a4SJacob Faibussowitsch {
2766dfb205c3SBarry Smith   PetscFunctionBegin;
27675f80ce2aSJacob Faibussowitsch   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
27685f80ce2aSJacob Faibussowitsch   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
27699566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
27709566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
27719566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPISBAIJ));
27729566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
27733ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2774dfb205c3SBarry Smith }
2775dfb205c3SBarry Smith 
2776dfb205c3SBarry Smith /*@C
277711a5261eSBarry Smith   MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2778dfb205c3SBarry Smith 
2779d083f849SBarry Smith   Collective
2780dfb205c3SBarry Smith 
2781dfb205c3SBarry Smith   Input Parameters:
27821c4f3114SJed Brown + B  - the matrix
2783dfb205c3SBarry Smith . bs - the block size
27842ef1f0ffSBarry Smith . i  - the indices into `j` for the start of each local row (starts with zero)
2785dfb205c3SBarry Smith . j  - the column indices for each local row (starts with zero) these must be sorted for each row
2786dfb205c3SBarry Smith - v  - optional values in the matrix
2787dfb205c3SBarry Smith 
2788664954b6SBarry Smith   Level: advanced
2789664954b6SBarry Smith 
2790664954b6SBarry Smith   Notes:
27910cd7f59aSBarry Smith   Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
27920cd7f59aSBarry Smith   and usually the numerical values as well
27930cd7f59aSBarry Smith 
279450c5228eSBarry Smith   Any entries below the diagonal are ignored
2795dfb205c3SBarry Smith 
27961cc06b55SBarry Smith .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2797dfb205c3SBarry Smith @*/
2798d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2799d71ae5a4SJacob Faibussowitsch {
2800dfb205c3SBarry Smith   PetscFunctionBegin;
2801cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
28023ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
2803dfb205c3SBarry Smith }
2804dfb205c3SBarry Smith 
2805d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2806d71ae5a4SJacob Faibussowitsch {
280710c56fdeSHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
280810c56fdeSHong Zhang   PetscInt    *indx;
280910c56fdeSHong Zhang   PetscScalar *values;
2810dfb205c3SBarry Smith 
28114dcd73b1SHong Zhang   PetscFunctionBegin;
28129566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
281310c56fdeSHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
281410c56fdeSHong Zhang     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2815de25e9cbSPierre Jolivet     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
281610c56fdeSHong Zhang     PetscInt     *bindx, rmax = a->rmax, j;
2817de25e9cbSPierre Jolivet     PetscMPIInt   rank, size;
28184dcd73b1SHong Zhang 
28199566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28209371c9d4SSatish Balay     mbs = m / bs;
28219371c9d4SSatish Balay     Nbs = N / cbs;
282248a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2823da91a574SPierre Jolivet     nbs = n / cbs;
28244dcd73b1SHong Zhang 
28259566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(rmax, &bindx));
2826d0609cedSBarry Smith     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2827de25e9cbSPierre Jolivet 
28289566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
28299566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &size));
2830de25e9cbSPierre Jolivet     if (rank == size - 1) {
2831de25e9cbSPierre Jolivet       /* Check sum(nbs) = Nbs */
28325f80ce2aSJacob Faibussowitsch       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2833de25e9cbSPierre Jolivet     }
2834de25e9cbSPierre Jolivet 
2835d0609cedSBarry Smith     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
28369566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
283710c56fdeSHong Zhang     for (i = 0; i < mbs; i++) {
28389566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
28394dcd73b1SHong Zhang       nnz = nnz / bs;
28404dcd73b1SHong Zhang       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
28419566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
28429566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
28434dcd73b1SHong Zhang     }
28449566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28459566063dSJacob Faibussowitsch     PetscCall(PetscFree(bindx));
28464dcd73b1SHong Zhang 
28479566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
28489566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
28499566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
28509566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, MATSBAIJ));
28519566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
28529566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2853d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
28544dcd73b1SHong Zhang   }
28554dcd73b1SHong Zhang 
285610c56fdeSHong Zhang   /* numeric phase */
28579566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28589566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
28594dcd73b1SHong Zhang 
28609566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
28614dcd73b1SHong Zhang   for (i = 0; i < m; i++) {
28629566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28634dcd73b1SHong Zhang     Ii = i + rstart;
28649566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
28659566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28664dcd73b1SHong Zhang   }
28679566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28689566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
28699566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
28703ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
28714dcd73b1SHong Zhang }
2872