xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1a30f8f8cSSatish Balay 
2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/
3c6db04a5SJed Brown #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4c6db04a5SJed Brown #include <../src/mat/impls/sbaij/seq/sbaij.h>
5c6db04a5SJed Brown #include <petscblaslapack.h>
6a30f8f8cSSatish Balay 
76214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
8cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
96214f412SHong Zhang #endif
10d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
11d24d4204SJose E. Roman PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
12d24d4204SJose E. Roman #endif
13b147fbf3SStefano Zampini 
14b147fbf3SStefano Zampini /* This could be moved to matimpl.h */
15*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16*d71ae5a4SJacob Faibussowitsch {
17b147fbf3SStefano Zampini   Mat       preallocator;
18b147fbf3SStefano Zampini   PetscInt  r, rstart, rend;
19b147fbf3SStefano Zampini   PetscInt  bs, i, m, n, M, N;
20b147fbf3SStefano Zampini   PetscBool cong = PETSC_TRUE;
21b147fbf3SStefano Zampini 
22b147fbf3SStefano Zampini   PetscFunctionBegin;
23b147fbf3SStefano Zampini   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
24b147fbf3SStefano Zampini   PetscValidLogicalCollectiveInt(B, nm, 2);
25b147fbf3SStefano Zampini   for (i = 0; i < nm; i++) {
26b147fbf3SStefano Zampini     PetscValidHeaderSpecific(X[i], MAT_CLASSID, 3);
279566063dSJacob Faibussowitsch     PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
285f80ce2aSJacob Faibussowitsch     PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
29b147fbf3SStefano Zampini   }
30b147fbf3SStefano Zampini   PetscValidLogicalCollectiveBool(B, fill, 5);
319566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(B, &bs));
329566063dSJacob Faibussowitsch   PetscCall(MatGetSize(B, &M, &N));
339566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(B, &m, &n));
349566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
359566063dSJacob Faibussowitsch   PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
369566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(preallocator, bs));
379566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(preallocator, m, n, M, N));
389566063dSJacob Faibussowitsch   PetscCall(MatSetUp(preallocator));
399566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
40b147fbf3SStefano Zampini   for (r = rstart; r < rend; ++r) {
41b147fbf3SStefano Zampini     PetscInt           ncols;
42b147fbf3SStefano Zampini     const PetscInt    *row;
43b147fbf3SStefano Zampini     const PetscScalar *vals;
44b147fbf3SStefano Zampini 
45b147fbf3SStefano Zampini     for (i = 0; i < nm; i++) {
469566063dSJacob Faibussowitsch       PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
479566063dSJacob Faibussowitsch       PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
4848a46eb9SPierre Jolivet       if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
499566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
50b147fbf3SStefano Zampini     }
51b147fbf3SStefano Zampini   }
529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
549566063dSJacob Faibussowitsch   PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
559566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&preallocator));
56b147fbf3SStefano Zampini   PetscFunctionReturn(0);
57b147fbf3SStefano Zampini }
58b147fbf3SStefano Zampini 
59*d71ae5a4SJacob Faibussowitsch PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60*d71ae5a4SJacob Faibussowitsch {
61b147fbf3SStefano Zampini   Mat      B;
62b147fbf3SStefano Zampini   PetscInt r;
63b147fbf3SStefano Zampini 
64b147fbf3SStefano Zampini   PetscFunctionBegin;
65b147fbf3SStefano Zampini   if (reuse != MAT_REUSE_MATRIX) {
6628d58a37SPierre Jolivet     PetscBool symm = PETSC_TRUE, isdense;
67b147fbf3SStefano Zampini     PetscInt  bs;
68b147fbf3SStefano Zampini 
699566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
709566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
719566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, newtype));
729566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSize(A, &bs));
739566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSize(B, bs));
749566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->rmap));
759566063dSJacob Faibussowitsch     PetscCall(PetscLayoutSetUp(B->cmap));
769566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
7728d58a37SPierre Jolivet     if (!isdense) {
789566063dSJacob Faibussowitsch       PetscCall(MatGetRowUpperTriangular(A));
799566063dSJacob Faibussowitsch       PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
809566063dSJacob Faibussowitsch       PetscCall(MatRestoreRowUpperTriangular(A));
8128d58a37SPierre Jolivet     } else {
829566063dSJacob Faibussowitsch       PetscCall(MatSetUp(B));
8328d58a37SPierre Jolivet     }
8428d58a37SPierre Jolivet   } else {
8528d58a37SPierre Jolivet     B = *newmat;
869566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(B));
8728d58a37SPierre Jolivet   }
88b147fbf3SStefano Zampini 
899566063dSJacob Faibussowitsch   PetscCall(MatGetRowUpperTriangular(A));
90b147fbf3SStefano Zampini   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
91b147fbf3SStefano Zampini     PetscInt           ncols;
92b147fbf3SStefano Zampini     const PetscInt    *row;
93b147fbf3SStefano Zampini     const PetscScalar *vals;
94b147fbf3SStefano Zampini 
959566063dSJacob Faibussowitsch     PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
969566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
97eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
98b94d7dedSBarry Smith     if (A->hermitian == PETSC_BOOL3_TRUE) {
99eb1ec7c1SStefano Zampini       PetscInt i;
10048a46eb9SPierre Jolivet       for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
101eb1ec7c1SStefano Zampini     } else {
1029566063dSJacob Faibussowitsch       PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
103eb1ec7c1SStefano Zampini     }
104eb1ec7c1SStefano Zampini #else
1059566063dSJacob Faibussowitsch     PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
106eb1ec7c1SStefano Zampini #endif
1079566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
108b147fbf3SStefano Zampini   }
1099566063dSJacob Faibussowitsch   PetscCall(MatRestoreRowUpperTriangular(A));
1109566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
1119566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
112b147fbf3SStefano Zampini 
113b147fbf3SStefano Zampini   if (reuse == MAT_INPLACE_MATRIX) {
1149566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
115b147fbf3SStefano Zampini   } else {
116b147fbf3SStefano Zampini     *newmat = B;
117b147fbf3SStefano Zampini   }
118b147fbf3SStefano Zampini   PetscFunctionReturn(0);
119b147fbf3SStefano Zampini }
120b147fbf3SStefano Zampini 
121*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
122*d71ae5a4SJacob Faibussowitsch {
123f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
124a30f8f8cSSatish Balay 
125a30f8f8cSSatish Balay   PetscFunctionBegin;
1269566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
1279566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
128a30f8f8cSSatish Balay   PetscFunctionReturn(0);
129a30f8f8cSSatish Balay }
130a30f8f8cSSatish Balay 
131*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
132*d71ae5a4SJacob Faibussowitsch {
133f3566a2aSHong Zhang   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
134a30f8f8cSSatish Balay 
135a30f8f8cSSatish Balay   PetscFunctionBegin;
1369566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
1379566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
138a30f8f8cSSatish Balay   PetscFunctionReturn(0);
139a30f8f8cSSatish Balay }
140a30f8f8cSSatish Balay 
141d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
142a30f8f8cSSatish Balay   { \
143a30f8f8cSSatish Balay     brow = row / bs; \
1449371c9d4SSatish Balay     rp   = aj + ai[brow]; \
1459371c9d4SSatish Balay     ap   = aa + bs2 * ai[brow]; \
1469371c9d4SSatish Balay     rmax = aimax[brow]; \
1479371c9d4SSatish Balay     nrow = ailen[brow]; \
148a30f8f8cSSatish Balay     bcol = col / bs; \
1499371c9d4SSatish Balay     ridx = row % bs; \
1509371c9d4SSatish Balay     cidx = col % bs; \
1519371c9d4SSatish Balay     low  = 0; \
1529371c9d4SSatish Balay     high = nrow; \
153a30f8f8cSSatish Balay     while (high - low > 3) { \
154a30f8f8cSSatish Balay       t = (low + high) / 2; \
155a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
156a30f8f8cSSatish Balay       else low = t; \
157a30f8f8cSSatish Balay     } \
158a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
159a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
160a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
161a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
162a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
163a30f8f8cSSatish Balay         else *bap = value; \
164a30f8f8cSSatish Balay         goto a_noinsert; \
165a30f8f8cSSatish Balay       } \
166a30f8f8cSSatish Balay     } \
167a30f8f8cSSatish Balay     if (a->nonew == 1) goto a_noinsert; \
1685f80ce2aSJacob Faibussowitsch     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
170a30f8f8cSSatish Balay     N = nrow++ - 1; \
171a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
1729566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
1739566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
1749566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
175a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
176a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
177e56f5c9eSBarry Smith     A->nonzerostate++; \
178a30f8f8cSSatish Balay   a_noinsert:; \
179a30f8f8cSSatish Balay     ailen[brow] = nrow; \
180a30f8f8cSSatish Balay   }
181e5e170daSBarry Smith 
182d40312a9SBarry Smith #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
183a30f8f8cSSatish Balay   { \
184a30f8f8cSSatish Balay     brow = row / bs; \
1859371c9d4SSatish Balay     rp   = bj + bi[brow]; \
1869371c9d4SSatish Balay     ap   = ba + bs2 * bi[brow]; \
1879371c9d4SSatish Balay     rmax = bimax[brow]; \
1889371c9d4SSatish Balay     nrow = bilen[brow]; \
189a30f8f8cSSatish Balay     bcol = col / bs; \
1909371c9d4SSatish Balay     ridx = row % bs; \
1919371c9d4SSatish Balay     cidx = col % bs; \
1929371c9d4SSatish Balay     low  = 0; \
1939371c9d4SSatish Balay     high = nrow; \
194a30f8f8cSSatish Balay     while (high - low > 3) { \
195a30f8f8cSSatish Balay       t = (low + high) / 2; \
196a30f8f8cSSatish Balay       if (rp[t] > bcol) high = t; \
197a30f8f8cSSatish Balay       else low = t; \
198a30f8f8cSSatish Balay     } \
199a30f8f8cSSatish Balay     for (_i = low; _i < high; _i++) { \
200a30f8f8cSSatish Balay       if (rp[_i] > bcol) break; \
201a30f8f8cSSatish Balay       if (rp[_i] == bcol) { \
202a30f8f8cSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
203a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
204a30f8f8cSSatish Balay         else *bap = value; \
205a30f8f8cSSatish Balay         goto b_noinsert; \
206a30f8f8cSSatish Balay       } \
207a30f8f8cSSatish Balay     } \
208a30f8f8cSSatish Balay     if (b->nonew == 1) goto b_noinsert; \
2095f80ce2aSJacob Faibussowitsch     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
210fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
211a30f8f8cSSatish Balay     N = nrow++ - 1; \
212a30f8f8cSSatish Balay     /* shift up all the later entries in this row */ \
2139566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
2149566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
2159566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
216a30f8f8cSSatish Balay     rp[_i]                          = bcol; \
217a30f8f8cSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
218e56f5c9eSBarry Smith     B->nonzerostate++; \
219a30f8f8cSSatish Balay   b_noinsert:; \
220a30f8f8cSSatish Balay     bilen[brow] = nrow; \
221a30f8f8cSSatish Balay   }
222a30f8f8cSSatish Balay 
223a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
224476417e5SBarry Smith    Any a(i,j) with i>j input by user is ingored or generates an error
225a30f8f8cSSatish Balay */
226*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
227*d71ae5a4SJacob Faibussowitsch {
228a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
229a30f8f8cSSatish Balay   MatScalar     value;
230ace3abfcSBarry Smith   PetscBool     roworiented = baij->roworiented;
2311302d50aSBarry Smith   PetscInt      i, j, row, col;
232d0f46423SBarry Smith   PetscInt      rstart_orig = mat->rmap->rstart;
233d0f46423SBarry Smith   PetscInt      rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
234d0f46423SBarry Smith   PetscInt      cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
235a30f8f8cSSatish Balay 
236a30f8f8cSSatish Balay   /* Some Variables required in the macro */
237a30f8f8cSSatish Balay   Mat           A     = baij->A;
238a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a     = (Mat_SeqSBAIJ *)(A)->data;
2391302d50aSBarry Smith   PetscInt     *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
240a30f8f8cSSatish Balay   MatScalar    *aa = a->a;
241a30f8f8cSSatish Balay 
242a30f8f8cSSatish Balay   Mat          B     = baij->B;
243a30f8f8cSSatish Balay   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
2441302d50aSBarry Smith   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
245a30f8f8cSSatish Balay   MatScalar   *ba = b->a;
246a30f8f8cSSatish Balay 
2471302d50aSBarry Smith   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
2481302d50aSBarry Smith   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
249a30f8f8cSSatish Balay   MatScalar *ap, *bap;
250a30f8f8cSSatish Balay 
251a30f8f8cSSatish Balay   /* for stash */
2520298fd71SBarry Smith   PetscInt   n_loc, *in_loc = NULL;
2530298fd71SBarry Smith   MatScalar *v_loc = NULL;
254a30f8f8cSSatish Balay 
255a30f8f8cSSatish Balay   PetscFunctionBegin;
256a30f8f8cSSatish Balay   if (!baij->donotstash) {
25759ffdab8SBarry Smith     if (n > baij->n_loc) {
2589566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->in_loc));
2599566063dSJacob Faibussowitsch       PetscCall(PetscFree(baij->v_loc));
2609566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->in_loc));
2619566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(n, &baij->v_loc));
26226fbe8dcSKarl Rupp 
26359ffdab8SBarry Smith       baij->n_loc = n;
26459ffdab8SBarry Smith     }
26559ffdab8SBarry Smith     in_loc = baij->in_loc;
26659ffdab8SBarry Smith     v_loc  = baij->v_loc;
267a30f8f8cSSatish Balay   }
268a30f8f8cSSatish Balay 
269a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
270a30f8f8cSSatish Balay     if (im[i] < 0) continue;
2715f80ce2aSJacob Faibussowitsch     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
272a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
273a30f8f8cSSatish Balay       row = im[i] - rstart_orig;                     /* local row index */
274a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
27501b2bd88SHong Zhang         if (im[i] / bs > in[j] / bs) {
27601b2bd88SHong Zhang           if (a->ignore_ltriangular) {
27701b2bd88SHong Zhang             continue; /* ignore lower triangular blocks */
27826fbe8dcSKarl Rupp           } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
27901b2bd88SHong Zhang         }
280a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
281a30f8f8cSSatish Balay           col  = in[j] - cstart_orig;                    /* local col index */
2829371c9d4SSatish Balay           brow = row / bs;
2839371c9d4SSatish Balay           bcol = col / bs;
284a30f8f8cSSatish Balay           if (brow > bcol) continue; /* ignore lower triangular blocks of A */
285db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
286db4deed7SKarl Rupp           else value = v[i + j * m];
287d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
2889566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
289f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
290f7d195e4SLawrence Mitchell           continue;
291f7d195e4SLawrence Mitchell         } else {
292f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
293f7d195e4SLawrence Mitchell           /* off-diag entry (B) */
294a30f8f8cSSatish Balay           if (mat->was_assembled) {
29548a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
296a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
2979566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
29871730473SSatish Balay             col = col - 1;
299a30f8f8cSSatish Balay #else
30071730473SSatish Balay             col = baij->colmap[in[j] / bs] - 1;
301a30f8f8cSSatish Balay #endif
302a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
3039566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
304a30f8f8cSSatish Balay               col = in[j];
305a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306a30f8f8cSSatish Balay               B     = baij->B;
307a30f8f8cSSatish Balay               b     = (Mat_SeqBAIJ *)(B)->data;
3089371c9d4SSatish Balay               bimax = b->imax;
3099371c9d4SSatish Balay               bi    = b->i;
3109371c9d4SSatish Balay               bilen = b->ilen;
3119371c9d4SSatish Balay               bj    = b->j;
312a30f8f8cSSatish Balay               ba    = b->a;
31371730473SSatish Balay             } else col += in[j] % bs;
314a30f8f8cSSatish Balay           } else col = in[j];
315db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
316db4deed7SKarl Rupp           else value = v[i + j * m];
317d40312a9SBarry Smith           MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
3189566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
319a30f8f8cSSatish Balay         }
320a30f8f8cSSatish Balay       }
321a30f8f8cSSatish Balay     } else { /* off processor entry */
3225f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
323a30f8f8cSSatish Balay       if (!baij->donotstash) {
3245080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
325a30f8f8cSSatish Balay         n_loc          = 0;
326a30f8f8cSSatish Balay         for (j = 0; j < n; j++) {
327f65c83cfSHong Zhang           if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
328a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
329a30f8f8cSSatish Balay           if (roworiented) {
330a30f8f8cSSatish Balay             v_loc[n_loc] = v[i * n + j];
331a30f8f8cSSatish Balay           } else {
332a30f8f8cSSatish Balay             v_loc[n_loc] = v[j * m + i];
333a30f8f8cSSatish Balay           }
334a30f8f8cSSatish Balay           n_loc++;
335a30f8f8cSSatish Balay         }
3369566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
337a30f8f8cSSatish Balay       }
338a30f8f8cSSatish Balay     }
339a30f8f8cSSatish Balay   }
340a30f8f8cSSatish Balay   PetscFunctionReturn(0);
341a30f8f8cSSatish Balay }
342a30f8f8cSSatish Balay 
343*d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
344*d71ae5a4SJacob Faibussowitsch {
34536bd2089SBarry Smith   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
34636bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
34736bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
34836bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
34936bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
35036bd2089SBarry Smith   const PetscScalar *value       = v;
35136bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
35236bd2089SBarry Smith 
35336bd2089SBarry Smith   PetscFunctionBegin;
35436bd2089SBarry Smith   if (col < row) {
35536bd2089SBarry Smith     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
35636bd2089SBarry Smith     else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
35736bd2089SBarry Smith   }
35836bd2089SBarry Smith   rp    = aj + ai[row];
35936bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
36036bd2089SBarry Smith   rmax  = imax[row];
36136bd2089SBarry Smith   nrow  = ailen[row];
36236bd2089SBarry Smith   value = v;
36336bd2089SBarry Smith   low   = 0;
36436bd2089SBarry Smith   high  = nrow;
36536bd2089SBarry Smith 
36636bd2089SBarry Smith   while (high - low > 7) {
36736bd2089SBarry Smith     t = (low + high) / 2;
36836bd2089SBarry Smith     if (rp[t] > col) high = t;
36936bd2089SBarry Smith     else low = t;
37036bd2089SBarry Smith   }
37136bd2089SBarry Smith   for (i = low; i < high; i++) {
37236bd2089SBarry Smith     if (rp[i] > col) break;
37336bd2089SBarry Smith     if (rp[i] == col) {
37436bd2089SBarry Smith       bap = ap + bs2 * i;
37536bd2089SBarry Smith       if (roworiented) {
37636bd2089SBarry Smith         if (is == ADD_VALUES) {
37736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
378ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
37936bd2089SBarry Smith           }
38036bd2089SBarry Smith         } else {
38136bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
382ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
38336bd2089SBarry Smith           }
38436bd2089SBarry Smith         }
38536bd2089SBarry Smith       } else {
38636bd2089SBarry Smith         if (is == ADD_VALUES) {
38736bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
388ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ += *value++;
38936bd2089SBarry Smith           }
39036bd2089SBarry Smith         } else {
39136bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
392ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *bap++ = *value++;
39336bd2089SBarry Smith           }
39436bd2089SBarry Smith         }
39536bd2089SBarry Smith       }
39636bd2089SBarry Smith       goto noinsert2;
39736bd2089SBarry Smith     }
39836bd2089SBarry Smith   }
39936bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
4005f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
40136bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4029371c9d4SSatish Balay   N = nrow++ - 1;
4039371c9d4SSatish Balay   high++;
40436bd2089SBarry Smith   /* shift up all the later entries in this row */
4059566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4069566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
40736bd2089SBarry Smith   rp[i] = col;
40836bd2089SBarry Smith   bap   = ap + bs2 * i;
40936bd2089SBarry Smith   if (roworiented) {
41036bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
411ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
41236bd2089SBarry Smith     }
41336bd2089SBarry Smith   } else {
41436bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
415ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
41636bd2089SBarry Smith     }
41736bd2089SBarry Smith   }
41836bd2089SBarry Smith noinsert2:;
41936bd2089SBarry Smith   ailen[row] = nrow;
42036bd2089SBarry Smith   PetscFunctionReturn(0);
42136bd2089SBarry Smith }
42236bd2089SBarry Smith 
42336bd2089SBarry Smith /*
42436bd2089SBarry Smith    This routine is exactly duplicated in mpibaij.c
42536bd2089SBarry Smith */
426*d71ae5a4SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
427*d71ae5a4SJacob Faibussowitsch {
42836bd2089SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
42936bd2089SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
43036bd2089SBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
43136bd2089SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
43236bd2089SBarry Smith   PetscBool          roworiented = a->roworiented;
43336bd2089SBarry Smith   const PetscScalar *value       = v;
43436bd2089SBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
43536bd2089SBarry Smith 
43636bd2089SBarry Smith   PetscFunctionBegin;
43736bd2089SBarry Smith   rp    = aj + ai[row];
43836bd2089SBarry Smith   ap    = aa + bs2 * ai[row];
43936bd2089SBarry Smith   rmax  = imax[row];
44036bd2089SBarry Smith   nrow  = ailen[row];
44136bd2089SBarry Smith   low   = 0;
44236bd2089SBarry Smith   high  = nrow;
44336bd2089SBarry Smith   value = v;
44436bd2089SBarry Smith   while (high - low > 7) {
44536bd2089SBarry Smith     t = (low + high) / 2;
44636bd2089SBarry Smith     if (rp[t] > col) high = t;
44736bd2089SBarry Smith     else low = t;
44836bd2089SBarry Smith   }
44936bd2089SBarry Smith   for (i = low; i < high; i++) {
45036bd2089SBarry Smith     if (rp[i] > col) break;
45136bd2089SBarry Smith     if (rp[i] == col) {
45236bd2089SBarry Smith       bap = ap + bs2 * i;
45336bd2089SBarry Smith       if (roworiented) {
45436bd2089SBarry Smith         if (is == ADD_VALUES) {
45536bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
456ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
45736bd2089SBarry Smith           }
45836bd2089SBarry Smith         } else {
45936bd2089SBarry Smith           for (ii = 0; ii < bs; ii++) {
460ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
46136bd2089SBarry Smith           }
46236bd2089SBarry Smith         }
46336bd2089SBarry Smith       } else {
46436bd2089SBarry Smith         if (is == ADD_VALUES) {
46536bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
466ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
46736bd2089SBarry Smith             bap += bs;
46836bd2089SBarry Smith           }
46936bd2089SBarry Smith         } else {
47036bd2089SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
471ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
47236bd2089SBarry Smith             bap += bs;
47336bd2089SBarry Smith           }
47436bd2089SBarry Smith         }
47536bd2089SBarry Smith       }
47636bd2089SBarry Smith       goto noinsert2;
47736bd2089SBarry Smith     }
47836bd2089SBarry Smith   }
47936bd2089SBarry Smith   if (nonew == 1) goto noinsert2;
4805f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
48136bd2089SBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
4829371c9d4SSatish Balay   N = nrow++ - 1;
4839371c9d4SSatish Balay   high++;
48436bd2089SBarry Smith   /* shift up all the later entries in this row */
4859566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
4869566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
48736bd2089SBarry Smith   rp[i] = col;
48836bd2089SBarry Smith   bap   = ap + bs2 * i;
48936bd2089SBarry Smith   if (roworiented) {
49036bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
491ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
49236bd2089SBarry Smith     }
49336bd2089SBarry Smith   } else {
49436bd2089SBarry Smith     for (ii = 0; ii < bs; ii++) {
495ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
49636bd2089SBarry Smith     }
49736bd2089SBarry Smith   }
49836bd2089SBarry Smith noinsert2:;
49936bd2089SBarry Smith   ailen[row] = nrow;
50036bd2089SBarry Smith   PetscFunctionReturn(0);
50136bd2089SBarry Smith }
50236bd2089SBarry Smith 
50336bd2089SBarry Smith /*
50436bd2089SBarry Smith     This routine could be optimized by removing the need for the block copy below and passing stride information
50536bd2089SBarry Smith   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
50636bd2089SBarry Smith */
507*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
508*d71ae5a4SJacob Faibussowitsch {
5090880e062SHong Zhang   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ *)mat->data;
510f15d580aSBarry Smith   const MatScalar *value;
511f15d580aSBarry Smith   MatScalar       *barray      = baij->barray;
512ace3abfcSBarry Smith   PetscBool        roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
513899cda47SBarry Smith   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
514476417e5SBarry Smith   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
515476417e5SBarry Smith   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
5160880e062SHong Zhang 
517a30f8f8cSSatish Balay   PetscFunctionBegin;
5180880e062SHong Zhang   if (!barray) {
5199566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2, &barray));
5200880e062SHong Zhang     baij->barray = barray;
5210880e062SHong Zhang   }
5220880e062SHong Zhang 
5230880e062SHong Zhang   if (roworiented) {
5240880e062SHong Zhang     stepval = (n - 1) * bs;
5250880e062SHong Zhang   } else {
5260880e062SHong Zhang     stepval = (m - 1) * bs;
5270880e062SHong Zhang   }
5280880e062SHong Zhang   for (i = 0; i < m; i++) {
5290880e062SHong Zhang     if (im[i] < 0) continue;
5306bdcaf15SBarry Smith     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
5310880e062SHong Zhang     if (im[i] >= rstart && im[i] < rend) {
5320880e062SHong Zhang       row = im[i] - rstart;
5330880e062SHong Zhang       for (j = 0; j < n; j++) {
534f3f98c53SJed Brown         if (im[i] > in[j]) {
535f3f98c53SJed Brown           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
536e32f2f54SBarry Smith           else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
537f3f98c53SJed Brown         }
5380880e062SHong Zhang         /* If NumCol = 1 then a copy is not required */
5390880e062SHong Zhang         if ((roworiented) && (n == 1)) {
540f15d580aSBarry Smith           barray = (MatScalar *)v + i * bs2;
5410880e062SHong Zhang         } else if ((!roworiented) && (m == 1)) {
542f15d580aSBarry Smith           barray = (MatScalar *)v + j * bs2;
5430880e062SHong Zhang         } else { /* Here a copy is required */
5440880e062SHong Zhang           if (roworiented) {
5450880e062SHong Zhang             value = v + i * (stepval + bs) * bs + j * bs;
5460880e062SHong Zhang           } else {
5470880e062SHong Zhang             value = v + j * (stepval + bs) * bs + i * bs;
5480880e062SHong Zhang           }
5490880e062SHong Zhang           for (ii = 0; ii < bs; ii++, value += stepval) {
550ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
5510880e062SHong Zhang           }
5520880e062SHong Zhang           barray -= bs2;
5530880e062SHong Zhang         }
5540880e062SHong Zhang 
5550880e062SHong Zhang         if (in[j] >= cstart && in[j] < cend) {
5560880e062SHong Zhang           col = in[j] - cstart;
5579566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
558f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
559f7d195e4SLawrence Mitchell           continue;
560f7d195e4SLawrence Mitchell         } else {
561f7d195e4SLawrence Mitchell           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
5620880e062SHong Zhang           if (mat->was_assembled) {
56348a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
5640880e062SHong Zhang 
5652515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
5660880e062SHong Zhang   #if defined(PETSC_USE_CTABLE)
5679371c9d4SSatish Balay             {
5689371c9d4SSatish Balay               PetscInt data;
5699566063dSJacob Faibussowitsch               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
57008401ef6SPierre Jolivet               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
5710880e062SHong Zhang             }
5720880e062SHong Zhang   #else
57308401ef6SPierre Jolivet             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
5740880e062SHong Zhang   #endif
5750880e062SHong Zhang #endif
5760880e062SHong Zhang #if defined(PETSC_USE_CTABLE)
5779566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
5780880e062SHong Zhang             col = (col - 1) / bs;
5790880e062SHong Zhang #else
5800880e062SHong Zhang             col = (baij->colmap[in[j]] - 1) / bs;
5810880e062SHong Zhang #endif
5820880e062SHong Zhang             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
5839566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPISBAIJ(mat));
5840880e062SHong Zhang               col = in[j];
5850880e062SHong Zhang             }
58626fbe8dcSKarl Rupp           } else col = in[j];
5879566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
5880880e062SHong Zhang         }
5890880e062SHong Zhang       }
5900880e062SHong Zhang     } else {
5915f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
5920880e062SHong Zhang       if (!baij->donotstash) {
5930880e062SHong Zhang         if (roworiented) {
5949566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
5950880e062SHong Zhang         } else {
5969566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
5970880e062SHong Zhang         }
5980880e062SHong Zhang       }
5990880e062SHong Zhang     }
6000880e062SHong Zhang   }
6010880e062SHong Zhang   PetscFunctionReturn(0);
602a30f8f8cSSatish Balay }
603a30f8f8cSSatish Balay 
604*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
605*d71ae5a4SJacob Faibussowitsch {
606f3566a2aSHong Zhang   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
607d0f46423SBarry Smith   PetscInt      bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
608d0f46423SBarry Smith   PetscInt      bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
609a30f8f8cSSatish Balay 
610a30f8f8cSSatish Balay   PetscFunctionBegin;
611a30f8f8cSSatish Balay   for (i = 0; i < m; i++) {
61254c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
61354c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
614a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
615a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
616a30f8f8cSSatish Balay       for (j = 0; j < n; j++) {
61754c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
61854c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
619a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
620a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
6219566063dSJacob Faibussowitsch           PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
622a30f8f8cSSatish Balay         } else {
62348a46eb9SPierre Jolivet           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
624a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
6259566063dSJacob Faibussowitsch           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
626a30f8f8cSSatish Balay           data--;
627a30f8f8cSSatish Balay #else
628a30f8f8cSSatish Balay           data = baij->colmap[idxn[j] / bs] - 1;
629a30f8f8cSSatish Balay #endif
630a30f8f8cSSatish Balay           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
631a30f8f8cSSatish Balay           else {
632a30f8f8cSSatish Balay             col = data + idxn[j] % bs;
6339566063dSJacob Faibussowitsch             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
634a30f8f8cSSatish Balay           }
635a30f8f8cSSatish Balay         }
636a30f8f8cSSatish Balay       }
637f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
638a30f8f8cSSatish Balay   }
639a30f8f8cSSatish Balay   PetscFunctionReturn(0);
640a30f8f8cSSatish Balay }
641a30f8f8cSSatish Balay 
642*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
643*d71ae5a4SJacob Faibussowitsch {
644a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
645a30f8f8cSSatish Balay   PetscReal     sum[2], *lnorm2;
646a30f8f8cSSatish Balay 
647a30f8f8cSSatish Balay   PetscFunctionBegin;
648a30f8f8cSSatish Balay   if (baij->size == 1) {
6499566063dSJacob Faibussowitsch     PetscCall(MatNorm(baij->A, type, norm));
650a30f8f8cSSatish Balay   } else {
651a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
6529566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(2, &lnorm2));
6539566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->A, type, lnorm2));
6549371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
6559371c9d4SSatish Balay       lnorm2++; /* squar power of norm(A) */
6569566063dSJacob Faibussowitsch       PetscCall(MatNorm(baij->B, type, lnorm2));
6579371c9d4SSatish Balay       *lnorm2 = (*lnorm2) * (*lnorm2);
6589371c9d4SSatish Balay       lnorm2--; /* squar power of norm(B) */
6591c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
6608f1a2a5eSBarry Smith       *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
6619566063dSJacob Faibussowitsch       PetscCall(PetscFree(lnorm2));
6620b8dc8d2SHong Zhang     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
6630b8dc8d2SHong Zhang       Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
6640b8dc8d2SHong Zhang       Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ *)baij->B->data;
6650b8dc8d2SHong Zhang       PetscReal    *rsum, *rsum2, vabs;
666899cda47SBarry Smith       PetscInt     *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
667d0f46423SBarry Smith       PetscInt      brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
6680b8dc8d2SHong Zhang       MatScalar    *v;
6690b8dc8d2SHong Zhang 
6709566063dSJacob Faibussowitsch       PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
6719566063dSJacob Faibussowitsch       PetscCall(PetscArrayzero(rsum, mat->cmap->N));
6720b8dc8d2SHong Zhang       /* Amat */
6739371c9d4SSatish Balay       v  = amat->a;
6749371c9d4SSatish Balay       jj = amat->j;
6750b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
6760b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
6770b8dc8d2SHong Zhang         nz   = amat->i[brow + 1] - amat->i[brow];
6780b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
6799371c9d4SSatish Balay           gcol = bs * (rstart + *jj);
6809371c9d4SSatish Balay           jj++;
6810b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
6820b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
6839371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
6849371c9d4SSatish Balay               v++;
6850b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
6860b8dc8d2SHong Zhang               /* non-diagonal block */
6870b8dc8d2SHong Zhang               if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
6880b8dc8d2SHong Zhang             }
6890b8dc8d2SHong Zhang           }
6900b8dc8d2SHong Zhang         }
6919566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
6920b8dc8d2SHong Zhang       }
6930b8dc8d2SHong Zhang       /* Bmat */
6949371c9d4SSatish Balay       v  = bmat->a;
6959371c9d4SSatish Balay       jj = bmat->j;
6960b8dc8d2SHong Zhang       for (brow = 0; brow < mbs; brow++) {
6970b8dc8d2SHong Zhang         grow = bs * (rstart + brow);
6980b8dc8d2SHong Zhang         nz   = bmat->i[brow + 1] - bmat->i[brow];
6990b8dc8d2SHong Zhang         for (bcol = 0; bcol < nz; bcol++) {
7009371c9d4SSatish Balay           gcol = bs * garray[*jj];
7019371c9d4SSatish Balay           jj++;
7020b8dc8d2SHong Zhang           for (col = 0; col < bs; col++) {
7030b8dc8d2SHong Zhang             for (row = 0; row < bs; row++) {
7049371c9d4SSatish Balay               vabs = PetscAbsScalar(*v);
7059371c9d4SSatish Balay               v++;
7060b8dc8d2SHong Zhang               rsum[gcol + col] += vabs;
7070b8dc8d2SHong Zhang               rsum[grow + row] += vabs;
7080b8dc8d2SHong Zhang             }
7090b8dc8d2SHong Zhang           }
7100b8dc8d2SHong Zhang         }
7119566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(nz * bs * bs));
7120b8dc8d2SHong Zhang       }
7131c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
7140b8dc8d2SHong Zhang       *norm = 0.0;
715d0f46423SBarry Smith       for (col = 0; col < mat->cmap->N; col++) {
7160b8dc8d2SHong Zhang         if (rsum2[col] > *norm) *norm = rsum2[col];
7170b8dc8d2SHong Zhang       }
7189566063dSJacob Faibussowitsch       PetscCall(PetscFree2(rsum, rsum2));
719f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
720a30f8f8cSSatish Balay   }
721a30f8f8cSSatish Balay   PetscFunctionReturn(0);
722a30f8f8cSSatish Balay }
723a30f8f8cSSatish Balay 
724*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
725*d71ae5a4SJacob Faibussowitsch {
726a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
7271302d50aSBarry Smith   PetscInt      nstash, reallocs;
728a30f8f8cSSatish Balay 
729a30f8f8cSSatish Balay   PetscFunctionBegin;
73026fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
731a30f8f8cSSatish Balay 
7329566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
7339566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
7349566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7359566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
7369566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
7379566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
738a30f8f8cSSatish Balay   PetscFunctionReturn(0);
739a30f8f8cSSatish Balay }
740a30f8f8cSSatish Balay 
741*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
742*d71ae5a4SJacob Faibussowitsch {
743a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
744a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ *)baij->A->data;
74513f74950SBarry Smith   PetscInt      i, j, rstart, ncols, flg, bs2 = baij->bs2;
746e44c0bd4SBarry Smith   PetscInt     *row, *col;
747ace3abfcSBarry Smith   PetscBool     other_disassembled;
74813f74950SBarry Smith   PetscMPIInt   n;
749ace3abfcSBarry Smith   PetscBool     r1, r2, r3;
750a30f8f8cSSatish Balay   MatScalar    *val;
751a30f8f8cSSatish Balay 
75291c97fd4SSatish Balay   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
753a30f8f8cSSatish Balay   PetscFunctionBegin;
7544cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
755a30f8f8cSSatish Balay     while (1) {
7569566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
757a30f8f8cSSatish Balay       if (!flg) break;
758a30f8f8cSSatish Balay 
759a30f8f8cSSatish Balay       for (i = 0; i < n;) {
760a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
76126fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
76226fbe8dcSKarl Rupp           if (row[j] != rstart) break;
76326fbe8dcSKarl Rupp         }
764a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
765a30f8f8cSSatish Balay         else ncols = n - i;
766a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
7679566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
768a30f8f8cSSatish Balay         i = j;
769a30f8f8cSSatish Balay       }
770a30f8f8cSSatish Balay     }
7719566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
772a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
773a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
774a30f8f8cSSatish Balay        restore the original flags */
775a30f8f8cSSatish Balay     r1 = baij->roworiented;
776a30f8f8cSSatish Balay     r2 = a->roworiented;
77791c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
77826fbe8dcSKarl Rupp 
779a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
780a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
78126fbe8dcSKarl Rupp 
78291c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
783a30f8f8cSSatish Balay     while (1) {
7849566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
785a30f8f8cSSatish Balay       if (!flg) break;
786a30f8f8cSSatish Balay 
787a30f8f8cSSatish Balay       for (i = 0; i < n;) {
788a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
78926fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
79026fbe8dcSKarl Rupp           if (row[j] != rstart) break;
79126fbe8dcSKarl Rupp         }
792a30f8f8cSSatish Balay         if (j < n) ncols = j - i;
793a30f8f8cSSatish Balay         else ncols = n - i;
7949566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
795a30f8f8cSSatish Balay         i = j;
796a30f8f8cSSatish Balay       }
797a30f8f8cSSatish Balay     }
7989566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
79926fbe8dcSKarl Rupp 
800a30f8f8cSSatish Balay     baij->roworiented = r1;
801a30f8f8cSSatish Balay     a->roworiented    = r2;
80226fbe8dcSKarl Rupp 
80391c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
804a30f8f8cSSatish Balay   }
805a30f8f8cSSatish Balay 
8069566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->A, mode));
8079566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->A, mode));
808a30f8f8cSSatish Balay 
809a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
8106aad120cSJose E. Roman      also disassemble ourselves, in order that we may reassemble. */
811a30f8f8cSSatish Balay   /*
812a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
813a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
814a30f8f8cSSatish Balay   */
815a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
8165f9db2b2SJunchao Zhang     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
81748a46eb9SPierre Jolivet     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
818a30f8f8cSSatish Balay   }
819a30f8f8cSSatish Balay 
8209371c9d4SSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
8219566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->B, mode));
8229566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->B, mode));
823a30f8f8cSSatish Balay 
8249566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
82526fbe8dcSKarl Rupp 
826f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
8274f9cfa9eSBarry Smith 
8284f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
8294f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
830e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
8311c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
832e56f5c9eSBarry Smith   }
833a30f8f8cSSatish Balay   PetscFunctionReturn(0);
834a30f8f8cSSatish Balay }
835a30f8f8cSSatish Balay 
836dd6ea824SBarry Smith extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
8379804daf3SBarry Smith #include <petscdraw.h>
838*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
839*d71ae5a4SJacob Faibussowitsch {
840a30f8f8cSSatish Balay   Mat_MPISBAIJ     *baij = (Mat_MPISBAIJ *)mat->data;
841d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
8427da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
843ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
844b0a32e0cSBarry Smith   PetscViewer       sviewer;
845f3ef73ceSBarry Smith   PetscViewerFormat format;
846a30f8f8cSSatish Balay 
847a30f8f8cSSatish Balay   PetscFunctionBegin;
8489566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
8499566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
85032077d6dSBarry Smith   if (iascii) {
8519566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
852456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
853a30f8f8cSSatish Balay       MatInfo info;
8549566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
8559566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
8569566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
8579371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
8589371c9d4SSatish Balay                                                    mat->rmap->bs, (double)info.memory));
8599566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
8609566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
8619566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
8629566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
8639566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
8649566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
8659566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
8669566063dSJacob Faibussowitsch       PetscCall(VecScatterView(baij->Mvctx, viewer));
867a30f8f8cSSatish Balay       PetscFunctionReturn(0);
868fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
8699566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
870a30f8f8cSSatish Balay       PetscFunctionReturn(0);
871c1490034SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
872c1490034SHong Zhang       PetscFunctionReturn(0);
873a30f8f8cSSatish Balay     }
874a30f8f8cSSatish Balay   }
875a30f8f8cSSatish Balay 
876a30f8f8cSSatish Balay   if (isdraw) {
877b0a32e0cSBarry Smith     PetscDraw draw;
878ace3abfcSBarry Smith     PetscBool isnull;
8799566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
8809566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
88145f3bb6eSLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
882a30f8f8cSSatish Balay   }
883a30f8f8cSSatish Balay 
8847da1fb6eSBarry Smith   {
885a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
886a30f8f8cSSatish Balay     Mat           A;
88765d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
88865d70643SHong Zhang     Mat_SeqBAIJ  *Bloc;
889d0f46423SBarry Smith     PetscInt      M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
890a30f8f8cSSatish Balay     MatScalar    *a;
8913e219373SBarry Smith     const char   *matname;
892a30f8f8cSSatish Balay 
893f204ca49SKris Buschelman     /* Should this be the same type as mat? */
8949566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
895dd400576SPatrick Sanan     if (rank == 0) {
8969566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
897a30f8f8cSSatish Balay     } else {
8989566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
899a30f8f8cSSatish Balay     }
9009566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPISBAIJ));
9019566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
9029566063dSJacob Faibussowitsch     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
903a30f8f8cSSatish Balay 
904a30f8f8cSSatish Balay     /* copy over the A part */
90565d70643SHong Zhang     Aloc = (Mat_SeqSBAIJ *)baij->A->data;
9069371c9d4SSatish Balay     ai   = Aloc->i;
9079371c9d4SSatish Balay     aj   = Aloc->j;
9089371c9d4SSatish Balay     a    = Aloc->a;
9099566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs, &rvals));
910a30f8f8cSSatish Balay 
911a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
912e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
91326fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
914a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
915e9f7bc9eSHong Zhang         col = (baij->cstartbs + aj[j]) * bs;
916a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9179566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
91826fbe8dcSKarl Rupp           col++;
91926fbe8dcSKarl Rupp           a += bs;
920a30f8f8cSSatish Balay         }
921a30f8f8cSSatish Balay       }
922a30f8f8cSSatish Balay     }
923a30f8f8cSSatish Balay     /* copy over the B part */
92465d70643SHong Zhang     Bloc = (Mat_SeqBAIJ *)baij->B->data;
9259371c9d4SSatish Balay     ai   = Bloc->i;
9269371c9d4SSatish Balay     aj   = Bloc->j;
9279371c9d4SSatish Balay     a    = Bloc->a;
928a30f8f8cSSatish Balay     for (i = 0; i < mbs; i++) {
929e9f7bc9eSHong Zhang       rvals[0] = bs * (baij->rstartbs + i);
93026fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
931a30f8f8cSSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
932a30f8f8cSSatish Balay         col = baij->garray[aj[j]] * bs;
933a30f8f8cSSatish Balay         for (k = 0; k < bs; k++) {
9349566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
93526fbe8dcSKarl Rupp           col++;
93626fbe8dcSKarl Rupp           a += bs;
937a30f8f8cSSatish Balay         }
938a30f8f8cSSatish Balay       }
939a30f8f8cSSatish Balay     }
9409566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvals));
9419566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
9429566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
943a30f8f8cSSatish Balay     /*
944a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
945b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
946a30f8f8cSSatish Balay     */
9479566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
9489566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
949dd400576SPatrick Sanan     if (rank == 0) {
9509566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
9519566063dSJacob Faibussowitsch       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
952a30f8f8cSSatish Balay     }
9539566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
9549566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
9559566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
956a30f8f8cSSatish Balay   }
957a30f8f8cSSatish Balay   PetscFunctionReturn(0);
958a30f8f8cSSatish Balay }
959a30f8f8cSSatish Balay 
960618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
961618cc2edSLisandro Dalcin #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
962d1654148SHong Zhang 
963*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
964*d71ae5a4SJacob Faibussowitsch {
965ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
966a30f8f8cSSatish Balay 
967a30f8f8cSSatish Balay   PetscFunctionBegin;
9689566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
9699566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
9709566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
9719566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
972d1654148SHong Zhang   if (iascii || isdraw || issocket) {
9739566063dSJacob Faibussowitsch     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
9741baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
975a30f8f8cSSatish Balay   PetscFunctionReturn(0);
976a30f8f8cSSatish Balay }
977a30f8f8cSSatish Balay 
978*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
979*d71ae5a4SJacob Faibussowitsch {
980a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
981a30f8f8cSSatish Balay 
982a30f8f8cSSatish Balay   PetscFunctionBegin;
983a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
984c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
985a30f8f8cSSatish Balay #endif
9869566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
9879566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->bstash));
9889566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&baij->A));
9899566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&baij->B));
990a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
9919566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&baij->colmap));
992a30f8f8cSSatish Balay #else
9939566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->colmap));
994a30f8f8cSSatish Balay #endif
9959566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
9969566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->lvec));
9979566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->Mvctx));
9989566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0));
9999566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec0b));
10009566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1));
10019566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1a));
10029566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->slvec1b));
10039566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->sMvctx));
10049566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
10059566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->barray));
10069566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->hd));
10079566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->diag));
10089566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->bb1));
10099566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->xx1));
1010ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
10119566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->setvaluescopy));
1012a30f8f8cSSatish Balay #endif
10139566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->in_loc));
10149566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->v_loc));
10159566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->rangebs));
10169566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
1017901853e0SKris Buschelman 
10189566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
10199566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
10209566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
10219566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
10229566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
10236214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
10249566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
10256214f412SHong Zhang #endif
1026d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
10279566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
1028d24d4204SJose E. Roman #endif
10299566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
10309566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
1031a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1032a30f8f8cSSatish Balay }
1033a30f8f8cSSatish Balay 
1034*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1035*d71ae5a4SJacob Faibussowitsch {
1036547795f9SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1037eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
10386de40e93SBarry Smith   PetscScalar       *from;
10396de40e93SBarry Smith   const PetscScalar *x;
1040547795f9SHong Zhang 
1041547795f9SHong Zhang   PetscFunctionBegin;
1042547795f9SHong Zhang   /* diagonal part */
10439566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
10449566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, 0.0));
1045547795f9SHong Zhang 
1046547795f9SHong Zhang   /* subdiagonal part */
10475f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
10489566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1049547795f9SHong Zhang 
1050547795f9SHong Zhang   /* copy x into the vec slvec0 */
10519566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10529566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1053547795f9SHong Zhang 
10549566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10559566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10569566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1057547795f9SHong Zhang 
10589566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10599566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1060547795f9SHong Zhang   /* supperdiagonal part */
10619566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1062547795f9SHong Zhang   PetscFunctionReturn(0);
1063547795f9SHong Zhang }
1064547795f9SHong Zhang 
1065*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1066*d71ae5a4SJacob Faibussowitsch {
1067a9d4b620SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1068eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1069d9ca1df4SBarry Smith   PetscScalar       *from;
1070d9ca1df4SBarry Smith   const PetscScalar *x;
1071a9d4b620SHong Zhang 
1072a9d4b620SHong Zhang   PetscFunctionBegin;
1073a9d4b620SHong Zhang   /* diagonal part */
10749566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
10759566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, 0.0));
1076a9d4b620SHong Zhang 
1077a9d4b620SHong Zhang   /* subdiagonal part */
10789566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1079fc165ae2SBarry Smith 
1080a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
10819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
10829566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
1083a9d4b620SHong Zhang 
10849566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
10859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
10869566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
1087fc165ae2SBarry Smith 
10889566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
10899566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1090a9d4b620SHong Zhang   /* supperdiagonal part */
10919566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1092a9d4b620SHong Zhang   PetscFunctionReturn(0);
1093a9d4b620SHong Zhang }
1094a9d4b620SHong Zhang 
1095*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1096*d71ae5a4SJacob Faibussowitsch {
1097eb1ec7c1SStefano Zampini   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1098eb1ec7c1SStefano Zampini   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1099eb1ec7c1SStefano Zampini   PetscScalar       *from, zero       = 0.0;
1100eb1ec7c1SStefano Zampini   const PetscScalar *x;
1101eb1ec7c1SStefano Zampini 
1102eb1ec7c1SStefano Zampini   PetscFunctionBegin;
1103eb1ec7c1SStefano Zampini   /* diagonal part */
11049566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
11059566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, zero));
1106eb1ec7c1SStefano Zampini 
1107eb1ec7c1SStefano Zampini   /* subdiagonal part */
11085f80ce2aSJacob Faibussowitsch   PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
11099566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1110eb1ec7c1SStefano Zampini 
1111eb1ec7c1SStefano Zampini   /* copy x into the vec slvec0 */
11129566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11139566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11149566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11159566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1116eb1ec7c1SStefano Zampini 
11179566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11189566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11199566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1120eb1ec7c1SStefano Zampini 
1121eb1ec7c1SStefano Zampini   /* supperdiagonal part */
11229566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1123eb1ec7c1SStefano Zampini   PetscFunctionReturn(0);
1124eb1ec7c1SStefano Zampini }
1125eb1ec7c1SStefano Zampini 
1126*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1127*d71ae5a4SJacob Faibussowitsch {
1128de8b6608SHong Zhang   Mat_MPISBAIJ      *a   = (Mat_MPISBAIJ *)A->data;
1129d0f46423SBarry Smith   PetscInt           mbs = a->mbs, bs = A->rmap->bs;
1130d9ca1df4SBarry Smith   PetscScalar       *from, zero       = 0.0;
1131d9ca1df4SBarry Smith   const PetscScalar *x;
1132a9d4b620SHong Zhang 
1133a9d4b620SHong Zhang   PetscFunctionBegin;
1134a9d4b620SHong Zhang   /* diagonal part */
11359566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
11369566063dSJacob Faibussowitsch   PetscCall(VecSet(a->slvec1b, zero));
1137a9d4b620SHong Zhang 
1138a9d4b620SHong Zhang   /* subdiagonal part */
11399566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1140a9d4b620SHong Zhang 
1141a9d4b620SHong Zhang   /* copy x into the vec slvec0 */
11429566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &from));
11439566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(xx, &x));
11449566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(from, x, bs * mbs));
11459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &from));
1146a9d4b620SHong Zhang 
11479566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
11489566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(xx, &x));
11499566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1150a9d4b620SHong Zhang 
1151a9d4b620SHong Zhang   /* supperdiagonal part */
11529566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1153a9d4b620SHong Zhang   PetscFunctionReturn(0);
1154a9d4b620SHong Zhang }
1155a9d4b620SHong Zhang 
1156a30f8f8cSSatish Balay /*
1157a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1158a30f8f8cSSatish Balay    diagonal block
1159a30f8f8cSSatish Balay */
1160*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1161*d71ae5a4SJacob Faibussowitsch {
1162a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1163a30f8f8cSSatish Balay 
1164a30f8f8cSSatish Balay   PetscFunctionBegin;
116508401ef6SPierre Jolivet   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
11669566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(a->A, v));
1167a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1168a30f8f8cSSatish Balay }
1169a30f8f8cSSatish Balay 
1170*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1171*d71ae5a4SJacob Faibussowitsch {
1172a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1173a30f8f8cSSatish Balay 
1174a30f8f8cSSatish Balay   PetscFunctionBegin;
11759566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
11769566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
1177a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1178a30f8f8cSSatish Balay }
1179a30f8f8cSSatish Balay 
1180*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1181*d71ae5a4SJacob Faibussowitsch {
1182d0d4cfc2SHong Zhang   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1183d0d4cfc2SHong Zhang   PetscScalar  *vworkA, *vworkB, **pvA, **pvB, *v_p;
1184d0f46423SBarry Smith   PetscInt      bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1185d0f46423SBarry Smith   PetscInt      nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1186899cda47SBarry Smith   PetscInt     *cmap, *idx_p, cstart = mat->rstartbs;
1187d0d4cfc2SHong Zhang 
1188a30f8f8cSSatish Balay   PetscFunctionBegin;
11895f80ce2aSJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1190d0d4cfc2SHong Zhang   mat->getrowactive = PETSC_TRUE;
1191d0d4cfc2SHong Zhang 
1192d0d4cfc2SHong Zhang   if (!mat->rowvalues && (idx || v)) {
1193d0d4cfc2SHong Zhang     /*
1194d0d4cfc2SHong Zhang         allocate enough space to hold information from the longest row.
1195d0d4cfc2SHong Zhang     */
1196d0d4cfc2SHong Zhang     Mat_SeqSBAIJ *Aa  = (Mat_SeqSBAIJ *)mat->A->data;
1197d0d4cfc2SHong Zhang     Mat_SeqBAIJ  *Ba  = (Mat_SeqBAIJ *)mat->B->data;
1198d0d4cfc2SHong Zhang     PetscInt      max = 1, mbs = mat->mbs, tmp;
1199d0d4cfc2SHong Zhang     for (i = 0; i < mbs; i++) {
1200d0d4cfc2SHong Zhang       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
120126fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1202d0d4cfc2SHong Zhang     }
12039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1204d0d4cfc2SHong Zhang   }
1205d0d4cfc2SHong Zhang 
12065f80ce2aSJacob Faibussowitsch   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1207d0d4cfc2SHong Zhang   lrow = row - brstart; /* local row index */
1208d0d4cfc2SHong Zhang 
12099371c9d4SSatish Balay   pvA = &vworkA;
12109371c9d4SSatish Balay   pcA = &cworkA;
12119371c9d4SSatish Balay   pvB = &vworkB;
12129371c9d4SSatish Balay   pcB = &cworkB;
12139371c9d4SSatish Balay   if (!v) {
12149371c9d4SSatish Balay     pvA = NULL;
12159371c9d4SSatish Balay     pvB = NULL;
12169371c9d4SSatish Balay   }
12179371c9d4SSatish Balay   if (!idx) {
12189371c9d4SSatish Balay     pcA = NULL;
12199371c9d4SSatish Balay     if (!v) pcB = NULL;
12209371c9d4SSatish Balay   }
12219566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
12229566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1223d0d4cfc2SHong Zhang   nztot = nzA + nzB;
1224d0d4cfc2SHong Zhang 
1225d0d4cfc2SHong Zhang   cmap = mat->garray;
1226d0d4cfc2SHong Zhang   if (v || idx) {
1227d0d4cfc2SHong Zhang     if (nztot) {
1228d0d4cfc2SHong Zhang       /* Sort by increasing column numbers, assuming A and B already sorted */
1229d0d4cfc2SHong Zhang       PetscInt imark = -1;
1230d0d4cfc2SHong Zhang       if (v) {
1231d0d4cfc2SHong Zhang         *v = v_p = mat->rowvalues;
1232d0d4cfc2SHong Zhang         for (i = 0; i < nzB; i++) {
1233d0d4cfc2SHong Zhang           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1234d0d4cfc2SHong Zhang           else break;
1235d0d4cfc2SHong Zhang         }
1236d0d4cfc2SHong Zhang         imark = i;
1237d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1238d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1239d0d4cfc2SHong Zhang       }
1240d0d4cfc2SHong Zhang       if (idx) {
1241d0d4cfc2SHong Zhang         *idx = idx_p = mat->rowindices;
1242d0d4cfc2SHong Zhang         if (imark > -1) {
1243ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1244d0d4cfc2SHong Zhang         } else {
1245d0d4cfc2SHong Zhang           for (i = 0; i < nzB; i++) {
124626fbe8dcSKarl Rupp             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1247d0d4cfc2SHong Zhang             else break;
1248d0d4cfc2SHong Zhang           }
1249d0d4cfc2SHong Zhang           imark = i;
1250d0d4cfc2SHong Zhang         }
1251d0d4cfc2SHong Zhang         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1252d0d4cfc2SHong Zhang         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253d0d4cfc2SHong Zhang       }
1254d0d4cfc2SHong Zhang     } else {
1255f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1256f4259b30SLisandro Dalcin       if (v) *v = NULL;
1257d0d4cfc2SHong Zhang     }
1258d0d4cfc2SHong Zhang   }
1259d0d4cfc2SHong Zhang   *nz = nztot;
12609566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
12619566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1262a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1263a30f8f8cSSatish Balay }
1264a30f8f8cSSatish Balay 
1265*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1266*d71ae5a4SJacob Faibussowitsch {
1267a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1268a30f8f8cSSatish Balay 
1269a30f8f8cSSatish Balay   PetscFunctionBegin;
12705f80ce2aSJacob Faibussowitsch   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1271a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
1272a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1273a30f8f8cSSatish Balay }
1274a30f8f8cSSatish Balay 
1275*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1276*d71ae5a4SJacob Faibussowitsch {
1277d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1278d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1279d0d4cfc2SHong Zhang 
1280d0d4cfc2SHong Zhang   PetscFunctionBegin;
1281d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_TRUE;
1282d0d4cfc2SHong Zhang   PetscFunctionReturn(0);
1283d0d4cfc2SHong Zhang }
1284*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1285*d71ae5a4SJacob Faibussowitsch {
1286d0d4cfc2SHong Zhang   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1287d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1288d0d4cfc2SHong Zhang 
1289d0d4cfc2SHong Zhang   PetscFunctionBegin;
1290d0d4cfc2SHong Zhang   aA->getrow_utriangular = PETSC_FALSE;
1291d0d4cfc2SHong Zhang   PetscFunctionReturn(0);
1292d0d4cfc2SHong Zhang }
1293d0d4cfc2SHong Zhang 
1294*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1295*d71ae5a4SJacob Faibussowitsch {
12965f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
12975f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
12982726fb6dSPierre Jolivet     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
12992726fb6dSPierre Jolivet 
13009566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->A));
13019566063dSJacob Faibussowitsch     PetscCall(MatConjugate(a->B));
13025f80ce2aSJacob Faibussowitsch   }
13032726fb6dSPierre Jolivet   PetscFunctionReturn(0);
13042726fb6dSPierre Jolivet }
13052726fb6dSPierre Jolivet 
1306*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1307*d71ae5a4SJacob Faibussowitsch {
130899cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
130999cafbc1SBarry Smith 
131099cafbc1SBarry Smith   PetscFunctionBegin;
13119566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
13129566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
131399cafbc1SBarry Smith   PetscFunctionReturn(0);
131499cafbc1SBarry Smith }
131599cafbc1SBarry Smith 
1316*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1317*d71ae5a4SJacob Faibussowitsch {
131899cafbc1SBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
131999cafbc1SBarry Smith 
132099cafbc1SBarry Smith   PetscFunctionBegin;
13219566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
13229566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
132399cafbc1SBarry Smith   PetscFunctionReturn(0);
132499cafbc1SBarry Smith }
132599cafbc1SBarry Smith 
13267dae84e0SHong Zhang /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
132736032a97SHong Zhang    Input: isrow       - distributed(parallel),
132836032a97SHong Zhang           iscol_local - locally owned (seq)
132936032a97SHong Zhang */
1330*d71ae5a4SJacob Faibussowitsch PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1331*d71ae5a4SJacob Faibussowitsch {
13328f46ffcaSHong Zhang   PetscInt        sz1, sz2, *a1, *a2, i, j, k, nmatch;
13338f46ffcaSHong Zhang   const PetscInt *ptr1, *ptr2;
133436032a97SHong Zhang 
133536032a97SHong Zhang   PetscFunctionBegin;
13369566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(isrow, &sz1));
13379566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol_local, &sz2));
13381098a8e8SHong Zhang   if (sz1 > sz2) {
13391098a8e8SHong Zhang     *flg = PETSC_FALSE;
13401098a8e8SHong Zhang     PetscFunctionReturn(0);
13411098a8e8SHong Zhang   }
13428f46ffcaSHong Zhang 
13439566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &ptr1));
13449566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(iscol_local, &ptr2));
13458f46ffcaSHong Zhang 
13469566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz1, &a1));
13479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(sz2, &a2));
13489566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a1, ptr1, sz1));
13499566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a2, ptr2, sz2));
13509566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz1, a1));
13519566063dSJacob Faibussowitsch   PetscCall(PetscSortInt(sz2, a2));
13528f46ffcaSHong Zhang 
13538f46ffcaSHong Zhang   nmatch = 0;
13548f46ffcaSHong Zhang   k      = 0;
13558f46ffcaSHong Zhang   for (i = 0; i < sz1; i++) {
13568f46ffcaSHong Zhang     for (j = k; j < sz2; j++) {
13578f46ffcaSHong Zhang       if (a1[i] == a2[j]) {
13589371c9d4SSatish Balay         k = j;
13599371c9d4SSatish Balay         nmatch++;
13608f46ffcaSHong Zhang         break;
13618f46ffcaSHong Zhang       }
13628f46ffcaSHong Zhang     }
13638f46ffcaSHong Zhang   }
13649566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &ptr1));
13659566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(iscol_local, &ptr2));
13669566063dSJacob Faibussowitsch   PetscCall(PetscFree(a1));
13679566063dSJacob Faibussowitsch   PetscCall(PetscFree(a2));
13681098a8e8SHong Zhang   if (nmatch < sz1) {
13691098a8e8SHong Zhang     *flg = PETSC_FALSE;
13701098a8e8SHong Zhang   } else {
13711098a8e8SHong Zhang     *flg = PETSC_TRUE;
13721098a8e8SHong Zhang   }
137336032a97SHong Zhang   PetscFunctionReturn(0);
13748f46ffcaSHong Zhang }
137536032a97SHong Zhang 
1376*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1377*d71ae5a4SJacob Faibussowitsch {
137836032a97SHong Zhang   IS        iscol_local;
137936032a97SHong Zhang   PetscInt  csize;
138036032a97SHong Zhang   PetscBool isequal;
138136032a97SHong Zhang 
138236032a97SHong Zhang   PetscFunctionBegin;
13839566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
138436032a97SHong Zhang   if (call == MAT_REUSE_MATRIX) {
13859566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
13865f80ce2aSJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
138736032a97SHong Zhang   } else {
1388068661f9SToby Isaac     PetscBool issorted;
1389068661f9SToby Isaac 
13909566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
13919566063dSJacob Faibussowitsch     PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
13929566063dSJacob Faibussowitsch     PetscCall(ISSorted(iscol_local, &issorted));
13935f80ce2aSJacob Faibussowitsch     PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
13948f46ffcaSHong Zhang   }
13958f46ffcaSHong Zhang 
13967dae84e0SHong Zhang   /* now call MatCreateSubMatrix_MPIBAIJ() */
13979566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
13988f46ffcaSHong Zhang   if (call == MAT_INITIAL_MATRIX) {
13999566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
14009566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
14018f46ffcaSHong Zhang   }
14028f46ffcaSHong Zhang   PetscFunctionReturn(0);
14038f46ffcaSHong Zhang }
14048f46ffcaSHong Zhang 
1405*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1406*d71ae5a4SJacob Faibussowitsch {
1407a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1408a30f8f8cSSatish Balay 
1409a30f8f8cSSatish Balay   PetscFunctionBegin;
14109566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
14119566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
1412a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1413a30f8f8cSSatish Balay }
1414a30f8f8cSSatish Balay 
1415*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1416*d71ae5a4SJacob Faibussowitsch {
1417a30f8f8cSSatish Balay   Mat_MPISBAIJ  *a = (Mat_MPISBAIJ *)matin->data;
1418a30f8f8cSSatish Balay   Mat            A = a->A, B = a->B;
14193966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
1420a30f8f8cSSatish Balay 
1421a30f8f8cSSatish Balay   PetscFunctionBegin;
1422d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
142326fbe8dcSKarl Rupp 
14249566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
142526fbe8dcSKarl Rupp 
14269371c9d4SSatish Balay   isend[0] = info->nz_used;
14279371c9d4SSatish Balay   isend[1] = info->nz_allocated;
14289371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
14299371c9d4SSatish Balay   isend[3] = info->memory;
14309371c9d4SSatish Balay   isend[4] = info->mallocs;
143126fbe8dcSKarl Rupp 
14329566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
143326fbe8dcSKarl Rupp 
14349371c9d4SSatish Balay   isend[0] += info->nz_used;
14359371c9d4SSatish Balay   isend[1] += info->nz_allocated;
14369371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
14379371c9d4SSatish Balay   isend[3] += info->memory;
14389371c9d4SSatish Balay   isend[4] += info->mallocs;
1439a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1440a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1441a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1442a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1443a30f8f8cSSatish Balay     info->memory       = isend[3];
1444a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1445a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14461c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
144726fbe8dcSKarl Rupp 
1448a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1449a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1450a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1451a30f8f8cSSatish Balay     info->memory       = irecv[3];
1452a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1453a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14541c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
145526fbe8dcSKarl Rupp 
1456a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1457a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1458a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1459a30f8f8cSSatish Balay     info->memory       = irecv[3];
1460a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
146198921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1462a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1463a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1464a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
1465a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1466a30f8f8cSSatish Balay }
1467a30f8f8cSSatish Balay 
1468*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1469*d71ae5a4SJacob Faibussowitsch {
1470a30f8f8cSSatish Balay   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ *)A->data;
1471d0d4cfc2SHong Zhang   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1472a30f8f8cSSatish Balay 
1473a30f8f8cSSatish Balay   PetscFunctionBegin;
1474e98b92d7SKris Buschelman   switch (op) {
1475512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
1476e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
147728b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1478a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
1479c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
1480e98b92d7SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
148143674050SBarry Smith     MatCheckPreallocated(A, 1);
14829566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14839566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1484e98b92d7SKris Buschelman     break;
1485e98b92d7SKris Buschelman   case MAT_ROW_ORIENTED:
148643674050SBarry Smith     MatCheckPreallocated(A, 1);
14874e0d8c25SBarry Smith     a->roworiented = flg;
148826fbe8dcSKarl Rupp 
14899566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14909566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
1491e98b92d7SKris Buschelman     break;
14928c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
1493*d71ae5a4SJacob Faibussowitsch   case MAT_SORTED_FULL:
1494*d71ae5a4SJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1495*d71ae5a4SJacob Faibussowitsch     break;
1496*d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_OFF_PROC_ENTRIES:
1497*d71ae5a4SJacob Faibussowitsch     a->donotstash = flg;
1498*d71ae5a4SJacob Faibussowitsch     break;
1499*d71ae5a4SJacob Faibussowitsch   case MAT_USE_HASH_TABLE:
1500*d71ae5a4SJacob Faibussowitsch     a->ht_flag = flg;
1501*d71ae5a4SJacob Faibussowitsch     break;
1502*d71ae5a4SJacob Faibussowitsch   case MAT_HERMITIAN:
1503*d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1504*d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
15050f2140c7SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1506eb1ec7c1SStefano Zampini     if (flg) { /* need different mat-vec ops */
1507547795f9SHong Zhang       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1508eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1509eb1ec7c1SStefano Zampini       A->ops->multtranspose    = NULL;
1510eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = NULL;
1511b94d7dedSBarry Smith       A->symmetric             = PETSC_BOOL3_FALSE;
1512eb1ec7c1SStefano Zampini     }
15130f2140c7SStefano Zampini #endif
1514eeffb40dSHong Zhang     break;
1515ffa07934SHong Zhang   case MAT_SPD:
1516*d71ae5a4SJacob Faibussowitsch   case MAT_SYMMETRIC:
1517*d71ae5a4SJacob Faibussowitsch     MatCheckPreallocated(A, 1);
1518*d71ae5a4SJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1519eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
1520eb1ec7c1SStefano Zampini     if (flg) { /* restore to use default mat-vec ops */
1521eb1ec7c1SStefano Zampini       A->ops->mult             = MatMult_MPISBAIJ;
1522eb1ec7c1SStefano Zampini       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1523eb1ec7c1SStefano Zampini       A->ops->multtranspose    = MatMult_MPISBAIJ;
1524eb1ec7c1SStefano Zampini       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1525eb1ec7c1SStefano Zampini     }
1526eb1ec7c1SStefano Zampini #endif
1527eeffb40dSHong Zhang     break;
152877e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
152943674050SBarry Smith     MatCheckPreallocated(A, 1);
15309566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
1531eeffb40dSHong Zhang     break;
15329a4540c5SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1533b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
15345f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
15359566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
153677e54ba9SKris Buschelman     break;
1537*d71ae5a4SJacob Faibussowitsch   case MAT_SPD_ETERNAL:
1538*d71ae5a4SJacob Faibussowitsch     break;
1539*d71ae5a4SJacob Faibussowitsch   case MAT_IGNORE_LOWER_TRIANGULAR:
1540*d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1541*d71ae5a4SJacob Faibussowitsch     break;
1542*d71ae5a4SJacob Faibussowitsch   case MAT_ERROR_LOWER_TRIANGULAR:
1543*d71ae5a4SJacob Faibussowitsch     aA->ignore_ltriangular = flg;
1544*d71ae5a4SJacob Faibussowitsch     break;
1545*d71ae5a4SJacob Faibussowitsch   case MAT_GETROW_UPPERTRIANGULAR:
1546*d71ae5a4SJacob Faibussowitsch     aA->getrow_utriangular = flg;
1547*d71ae5a4SJacob Faibussowitsch     break;
1548*d71ae5a4SJacob Faibussowitsch   default:
1549*d71ae5a4SJacob Faibussowitsch     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1550a30f8f8cSSatish Balay   }
1551a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1552a30f8f8cSSatish Balay }
1553a30f8f8cSSatish Balay 
1554*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1555*d71ae5a4SJacob Faibussowitsch {
1556a30f8f8cSSatish Balay   PetscFunctionBegin;
15577fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1558cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX) {
15599566063dSJacob Faibussowitsch     PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1560cf37664fSBarry Smith   } else if (reuse == MAT_REUSE_MATRIX) {
15619566063dSJacob Faibussowitsch     PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1562fc4dec0aSBarry Smith   }
15638115998fSBarry Smith   PetscFunctionReturn(0);
1564a30f8f8cSSatish Balay }
1565a30f8f8cSSatish Balay 
1566*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1567*d71ae5a4SJacob Faibussowitsch {
1568a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1569a30f8f8cSSatish Balay   Mat           a = baij->A, b = baij->B;
15705e90f9d9SHong Zhang   PetscInt      nv, m, n;
1571ace3abfcSBarry Smith   PetscBool     flg;
1572a30f8f8cSSatish Balay 
1573a30f8f8cSSatish Balay   PetscFunctionBegin;
1574a30f8f8cSSatish Balay   if (ll != rr) {
15759566063dSJacob Faibussowitsch     PetscCall(VecEqual(ll, rr, &flg));
15765f80ce2aSJacob Faibussowitsch     PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1577a30f8f8cSSatish Balay   }
1578b3bf805bSHong Zhang   if (!ll) PetscFunctionReturn(0);
1579b3bf805bSHong Zhang 
15809566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
15815f80ce2aSJacob Faibussowitsch   PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1582b3bf805bSHong Zhang 
15839566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(rr, &nv));
15845f80ce2aSJacob Faibussowitsch   PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
15855e90f9d9SHong Zhang 
15869566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
15875e90f9d9SHong Zhang 
15885e90f9d9SHong Zhang   /* left diagonalscale the off-diagonal part */
1589dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, ll, NULL);
15905e90f9d9SHong Zhang 
15915e90f9d9SHong Zhang   /* scale the diagonal part */
1592dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
1593a30f8f8cSSatish Balay 
15945e90f9d9SHong Zhang   /* right diagonalscale the off-diagonal part */
15959566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1596dbbe0bcdSBarry Smith   PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1597a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1598a30f8f8cSSatish Balay }
1599a30f8f8cSSatish Balay 
1600*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1601*d71ae5a4SJacob Faibussowitsch {
1602f3566a2aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1603a30f8f8cSSatish Balay 
1604a30f8f8cSSatish Balay   PetscFunctionBegin;
16059566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
1606a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1607a30f8f8cSSatish Balay }
1608a30f8f8cSSatish Balay 
16096849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1610a30f8f8cSSatish Balay 
1611*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1612*d71ae5a4SJacob Faibussowitsch {
1613a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1614a30f8f8cSSatish Balay   Mat           a, b, c, d;
1615ace3abfcSBarry Smith   PetscBool     flg;
1616a30f8f8cSSatish Balay 
1617a30f8f8cSSatish Balay   PetscFunctionBegin;
16189371c9d4SSatish Balay   a = matA->A;
16199371c9d4SSatish Balay   b = matA->B;
16209371c9d4SSatish Balay   c = matB->A;
16219371c9d4SSatish Balay   d = matB->B;
1622a30f8f8cSSatish Balay 
16239566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
162448a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
16251c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1626a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1627a30f8f8cSSatish Balay }
1628a30f8f8cSSatish Balay 
1629*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1630*d71ae5a4SJacob Faibussowitsch {
16314c7a3774SStefano Zampini   PetscBool isbaij;
16323c896bc6SHong Zhang 
16333c896bc6SHong Zhang   PetscFunctionBegin;
16349566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
16355f80ce2aSJacob Faibussowitsch   PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
16363c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
16373c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
16389566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(A));
16399566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
16409566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(A));
16413c896bc6SHong Zhang   } else {
16424c7a3774SStefano Zampini     Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
16434c7a3774SStefano Zampini     Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
16444c7a3774SStefano Zampini 
16459566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
16469566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
16473c896bc6SHong Zhang   }
16489566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
16493c896bc6SHong Zhang   PetscFunctionReturn(0);
16503c896bc6SHong Zhang }
16513c896bc6SHong Zhang 
1652*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1653*d71ae5a4SJacob Faibussowitsch {
1654273d9f13SBarry Smith   PetscFunctionBegin;
16559566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1656273d9f13SBarry Smith   PetscFunctionReturn(0);
1657273d9f13SBarry Smith }
1658a5e6ed63SBarry Smith 
1659*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1660*d71ae5a4SJacob Faibussowitsch {
16614fe895cdSHong Zhang   Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
16624fe895cdSHong Zhang   PetscBLASInt  bnz, one                          = 1;
16634fe895cdSHong Zhang   Mat_SeqSBAIJ *xa, *ya;
16644fe895cdSHong Zhang   Mat_SeqBAIJ  *xb, *yb;
16654fe895cdSHong Zhang 
16664fe895cdSHong Zhang   PetscFunctionBegin;
16674fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
16684fe895cdSHong Zhang     PetscScalar alpha = a;
16694fe895cdSHong Zhang     xa                = (Mat_SeqSBAIJ *)xx->A->data;
16704fe895cdSHong Zhang     ya                = (Mat_SeqSBAIJ *)yy->A->data;
16719566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1672792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
16734fe895cdSHong Zhang     xb = (Mat_SeqBAIJ *)xx->B->data;
16744fe895cdSHong Zhang     yb = (Mat_SeqBAIJ *)yy->B->data;
16759566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1676792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
16779566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1678ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
16799566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
16809566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
16819566063dSJacob Faibussowitsch     PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
16824fe895cdSHong Zhang   } else {
16834de5dceeSHong Zhang     Mat       B;
16844de5dceeSHong Zhang     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
16855f80ce2aSJacob Faibussowitsch     PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
16869566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(X));
16879566063dSJacob Faibussowitsch     PetscCall(MatGetRowUpperTriangular(Y));
16889566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
16899566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
16909566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
16919566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
16929566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
16939566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
16949566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, MATMPISBAIJ));
16959566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
16969566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
16979566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
16989566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
16999566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
17009566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
17019566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
17029566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(X));
17039566063dSJacob Faibussowitsch     PetscCall(MatRestoreRowUpperTriangular(Y));
17044fe895cdSHong Zhang   }
17054fe895cdSHong Zhang   PetscFunctionReturn(0);
17064fe895cdSHong Zhang }
17074fe895cdSHong Zhang 
1708*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1709*d71ae5a4SJacob Faibussowitsch {
17101302d50aSBarry Smith   PetscInt  i;
1711afebec48SHong Zhang   PetscBool flg;
1712a5e6ed63SBarry Smith 
17136849ba73SBarry Smith   PetscFunctionBegin;
17149566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1715a5e6ed63SBarry Smith   for (i = 0; i < n; i++) {
17169566063dSJacob Faibussowitsch     PetscCall(ISEqual(irow[i], icol[i], &flg));
171748a46eb9SPierre Jolivet     if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
17184dcd73b1SHong Zhang   }
1719a5e6ed63SBarry Smith   PetscFunctionReturn(0);
1720a5e6ed63SBarry Smith }
1721a5e6ed63SBarry Smith 
1722*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1723*d71ae5a4SJacob Faibussowitsch {
17247d68702bSBarry Smith   Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
17256f33a894SBarry Smith   Mat_SeqSBAIJ *aij  = (Mat_SeqSBAIJ *)maij->A->data;
17267d68702bSBarry Smith 
17277d68702bSBarry Smith   PetscFunctionBegin;
17286f33a894SBarry Smith   if (!Y->preallocated) {
17299566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
17306f33a894SBarry Smith   } else if (!aij->nz) {
1731b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
17329566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1733b83222d8SBarry Smith     aij->nonew = nonew;
17347d68702bSBarry Smith   }
17359566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
17367d68702bSBarry Smith   PetscFunctionReturn(0);
17377d68702bSBarry Smith }
17387d68702bSBarry Smith 
1739*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1740*d71ae5a4SJacob Faibussowitsch {
17413b49f96aSBarry Smith   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
17423b49f96aSBarry Smith 
17433b49f96aSBarry Smith   PetscFunctionBegin;
17445f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
17459566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
17463b49f96aSBarry Smith   if (d) {
17473b49f96aSBarry Smith     PetscInt rstart;
17489566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
17493b49f96aSBarry Smith     *d += rstart / A->rmap->bs;
17503b49f96aSBarry Smith   }
17513b49f96aSBarry Smith   PetscFunctionReturn(0);
17523b49f96aSBarry Smith }
17533b49f96aSBarry Smith 
1754*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1755*d71ae5a4SJacob Faibussowitsch {
1756a5b7ff6bSBarry Smith   PetscFunctionBegin;
1757a5b7ff6bSBarry Smith   *a = ((Mat_MPISBAIJ *)A->data)->A;
1758a5b7ff6bSBarry Smith   PetscFunctionReturn(0);
1759a5b7ff6bSBarry Smith }
17603b49f96aSBarry Smith 
1761a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
17623964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1763a30f8f8cSSatish Balay                                        MatGetRow_MPISBAIJ,
1764a30f8f8cSSatish Balay                                        MatRestoreRow_MPISBAIJ,
1765a9d4b620SHong Zhang                                        MatMult_MPISBAIJ,
176697304618SKris Buschelman                                        /*  4*/ MatMultAdd_MPISBAIJ,
1767431c96f7SBarry Smith                                        MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1768431c96f7SBarry Smith                                        MatMultAdd_MPISBAIJ,
1769f4259b30SLisandro Dalcin                                        NULL,
1770f4259b30SLisandro Dalcin                                        NULL,
1771f4259b30SLisandro Dalcin                                        NULL,
1772f4259b30SLisandro Dalcin                                        /* 10*/ NULL,
1773f4259b30SLisandro Dalcin                                        NULL,
1774f4259b30SLisandro Dalcin                                        NULL,
177541f059aeSBarry Smith                                        MatSOR_MPISBAIJ,
1776a30f8f8cSSatish Balay                                        MatTranspose_MPISBAIJ,
177797304618SKris Buschelman                                        /* 15*/ MatGetInfo_MPISBAIJ,
1778a30f8f8cSSatish Balay                                        MatEqual_MPISBAIJ,
1779a30f8f8cSSatish Balay                                        MatGetDiagonal_MPISBAIJ,
1780a30f8f8cSSatish Balay                                        MatDiagonalScale_MPISBAIJ,
1781a30f8f8cSSatish Balay                                        MatNorm_MPISBAIJ,
178297304618SKris Buschelman                                        /* 20*/ MatAssemblyBegin_MPISBAIJ,
1783a30f8f8cSSatish Balay                                        MatAssemblyEnd_MPISBAIJ,
1784a30f8f8cSSatish Balay                                        MatSetOption_MPISBAIJ,
1785a30f8f8cSSatish Balay                                        MatZeroEntries_MPISBAIJ,
1786f4259b30SLisandro Dalcin                                        /* 24*/ NULL,
1787f4259b30SLisandro Dalcin                                        NULL,
1788f4259b30SLisandro Dalcin                                        NULL,
1789f4259b30SLisandro Dalcin                                        NULL,
1790f4259b30SLisandro Dalcin                                        NULL,
17914994cf47SJed Brown                                        /* 29*/ MatSetUp_MPISBAIJ,
1792f4259b30SLisandro Dalcin                                        NULL,
1793f4259b30SLisandro Dalcin                                        NULL,
1794a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPISBAIJ,
1795f4259b30SLisandro Dalcin                                        NULL,
1796d519adbfSMatthew Knepley                                        /* 34*/ MatDuplicate_MPISBAIJ,
1797f4259b30SLisandro Dalcin                                        NULL,
1798f4259b30SLisandro Dalcin                                        NULL,
1799f4259b30SLisandro Dalcin                                        NULL,
1800f4259b30SLisandro Dalcin                                        NULL,
1801d519adbfSMatthew Knepley                                        /* 39*/ MatAXPY_MPISBAIJ,
18027dae84e0SHong Zhang                                        MatCreateSubMatrices_MPISBAIJ,
1803d94109b8SHong Zhang                                        MatIncreaseOverlap_MPISBAIJ,
1804a30f8f8cSSatish Balay                                        MatGetValues_MPISBAIJ,
18053c896bc6SHong Zhang                                        MatCopy_MPISBAIJ,
1806f4259b30SLisandro Dalcin                                        /* 44*/ NULL,
1807a30f8f8cSSatish Balay                                        MatScale_MPISBAIJ,
18087d68702bSBarry Smith                                        MatShift_MPISBAIJ,
1809f4259b30SLisandro Dalcin                                        NULL,
1810f4259b30SLisandro Dalcin                                        NULL,
1811f4259b30SLisandro Dalcin                                        /* 49*/ NULL,
1812f4259b30SLisandro Dalcin                                        NULL,
1813f4259b30SLisandro Dalcin                                        NULL,
1814f4259b30SLisandro Dalcin                                        NULL,
1815f4259b30SLisandro Dalcin                                        NULL,
1816f4259b30SLisandro Dalcin                                        /* 54*/ NULL,
1817f4259b30SLisandro Dalcin                                        NULL,
1818a30f8f8cSSatish Balay                                        MatSetUnfactored_MPISBAIJ,
1819f4259b30SLisandro Dalcin                                        NULL,
1820a30f8f8cSSatish Balay                                        MatSetValuesBlocked_MPISBAIJ,
18217dae84e0SHong Zhang                                        /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1822f4259b30SLisandro Dalcin                                        NULL,
1823f4259b30SLisandro Dalcin                                        NULL,
1824f4259b30SLisandro Dalcin                                        NULL,
1825f4259b30SLisandro Dalcin                                        NULL,
1826f4259b30SLisandro Dalcin                                        /* 64*/ NULL,
1827f4259b30SLisandro Dalcin                                        NULL,
1828f4259b30SLisandro Dalcin                                        NULL,
1829f4259b30SLisandro Dalcin                                        NULL,
1830f4259b30SLisandro Dalcin                                        NULL,
1831d519adbfSMatthew Knepley                                        /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1832f4259b30SLisandro Dalcin                                        NULL,
183328d58a37SPierre Jolivet                                        MatConvert_MPISBAIJ_Basic,
1834f4259b30SLisandro Dalcin                                        NULL,
1835f4259b30SLisandro Dalcin                                        NULL,
1836f4259b30SLisandro Dalcin                                        /* 74*/ NULL,
1837f4259b30SLisandro Dalcin                                        NULL,
1838f4259b30SLisandro Dalcin                                        NULL,
1839f4259b30SLisandro Dalcin                                        NULL,
1840f4259b30SLisandro Dalcin                                        NULL,
1841f4259b30SLisandro Dalcin                                        /* 79*/ NULL,
1842f4259b30SLisandro Dalcin                                        NULL,
1843f4259b30SLisandro Dalcin                                        NULL,
1844f4259b30SLisandro Dalcin                                        NULL,
18455bba2384SShri Abhyankar                                        MatLoad_MPISBAIJ,
1846f4259b30SLisandro Dalcin                                        /* 84*/ NULL,
1847f4259b30SLisandro Dalcin                                        NULL,
1848f4259b30SLisandro Dalcin                                        NULL,
1849f4259b30SLisandro Dalcin                                        NULL,
1850f4259b30SLisandro Dalcin                                        NULL,
1851f4259b30SLisandro Dalcin                                        /* 89*/ NULL,
1852f4259b30SLisandro Dalcin                                        NULL,
1853f4259b30SLisandro Dalcin                                        NULL,
1854f4259b30SLisandro Dalcin                                        NULL,
1855f4259b30SLisandro Dalcin                                        NULL,
1856f4259b30SLisandro Dalcin                                        /* 94*/ NULL,
1857f4259b30SLisandro Dalcin                                        NULL,
1858f4259b30SLisandro Dalcin                                        NULL,
1859f4259b30SLisandro Dalcin                                        NULL,
1860f4259b30SLisandro Dalcin                                        NULL,
1861f4259b30SLisandro Dalcin                                        /* 99*/ NULL,
1862f4259b30SLisandro Dalcin                                        NULL,
1863f4259b30SLisandro Dalcin                                        NULL,
18642726fb6dSPierre Jolivet                                        MatConjugate_MPISBAIJ,
1865f4259b30SLisandro Dalcin                                        NULL,
1866f4259b30SLisandro Dalcin                                        /*104*/ NULL,
186799cafbc1SBarry Smith                                        MatRealPart_MPISBAIJ,
1868d0d4cfc2SHong Zhang                                        MatImaginaryPart_MPISBAIJ,
1869d0d4cfc2SHong Zhang                                        MatGetRowUpperTriangular_MPISBAIJ,
187095936485SShri Abhyankar                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1871f4259b30SLisandro Dalcin                                        /*109*/ NULL,
1872f4259b30SLisandro Dalcin                                        NULL,
1873f4259b30SLisandro Dalcin                                        NULL,
1874f4259b30SLisandro Dalcin                                        NULL,
18753b49f96aSBarry Smith                                        MatMissingDiagonal_MPISBAIJ,
1876f4259b30SLisandro Dalcin                                        /*114*/ NULL,
1877f4259b30SLisandro Dalcin                                        NULL,
1878f4259b30SLisandro Dalcin                                        NULL,
1879f4259b30SLisandro Dalcin                                        NULL,
1880f4259b30SLisandro Dalcin                                        NULL,
1881f4259b30SLisandro Dalcin                                        /*119*/ NULL,
1882f4259b30SLisandro Dalcin                                        NULL,
1883f4259b30SLisandro Dalcin                                        NULL,
1884f4259b30SLisandro Dalcin                                        NULL,
1885f4259b30SLisandro Dalcin                                        NULL,
1886f4259b30SLisandro Dalcin                                        /*124*/ NULL,
1887f4259b30SLisandro Dalcin                                        NULL,
1888f4259b30SLisandro Dalcin                                        NULL,
1889f4259b30SLisandro Dalcin                                        NULL,
1890f4259b30SLisandro Dalcin                                        NULL,
1891f4259b30SLisandro Dalcin                                        /*129*/ NULL,
1892f4259b30SLisandro Dalcin                                        NULL,
1893f4259b30SLisandro Dalcin                                        NULL,
1894f4259b30SLisandro Dalcin                                        NULL,
1895f4259b30SLisandro Dalcin                                        NULL,
1896f4259b30SLisandro Dalcin                                        /*134*/ NULL,
1897f4259b30SLisandro Dalcin                                        NULL,
1898f4259b30SLisandro Dalcin                                        NULL,
1899f4259b30SLisandro Dalcin                                        NULL,
1900f4259b30SLisandro Dalcin                                        NULL,
190146533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
1902f4259b30SLisandro Dalcin                                        NULL,
1903f4259b30SLisandro Dalcin                                        NULL,
1904f4259b30SLisandro Dalcin                                        NULL,
1905f4259b30SLisandro Dalcin                                        NULL,
1906d70f29a3SPierre Jolivet                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1907d70f29a3SPierre Jolivet                                        NULL,
1908d70f29a3SPierre Jolivet                                        NULL,
190999a7f59eSMark Adams                                        NULL,
191099a7f59eSMark Adams                                        NULL,
19117fb60732SBarry Smith                                        NULL,
19129371c9d4SSatish Balay                                        /*150*/ NULL};
1913a30f8f8cSSatish Balay 
1914*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1915*d71ae5a4SJacob Faibussowitsch {
1916476417e5SBarry Smith   Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1917535b19f3SBarry Smith   PetscInt      i, mbs, Mbs;
19185d2a9ed1SStefano Zampini   PetscMPIInt   size;
1919a23d5eceSKris Buschelman 
1920a23d5eceSKris Buschelman   PetscFunctionBegin;
19219566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
19229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19239566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
19249566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
19255f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
19265f80ce2aSJacob Faibussowitsch   PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1927899cda47SBarry Smith 
1928d0f46423SBarry Smith   mbs = B->rmap->n / bs;
1929d0f46423SBarry Smith   Mbs = B->rmap->N / bs;
19305f80ce2aSJacob Faibussowitsch   PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1931a23d5eceSKris Buschelman 
1932d0f46423SBarry Smith   B->rmap->bs = bs;
1933a23d5eceSKris Buschelman   b->bs2      = bs * bs;
1934a23d5eceSKris Buschelman   b->mbs      = mbs;
1935a23d5eceSKris Buschelman   b->Mbs      = Mbs;
1936de64b629SHong Zhang   b->nbs      = B->cmap->n / bs;
1937de64b629SHong Zhang   b->Nbs      = B->cmap->N / bs;
1938a23d5eceSKris Buschelman 
1939ad540459SPierre Jolivet   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1940d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart / bs;
1941d0f46423SBarry Smith   b->rendbs   = B->rmap->rend / bs;
1942a23d5eceSKris Buschelman 
1943d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart / bs;
1944d0f46423SBarry Smith   b->cendbs   = B->cmap->rend / bs;
1945a23d5eceSKris Buschelman 
1946cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
19479566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&b->colmap));
1948cb7b82ddSBarry Smith #else
19499566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
1950cb7b82ddSBarry Smith #endif
19519566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
19529566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
19539566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
19549566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0));
19559566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec0b));
19569566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1));
19579566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1a));
19589566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->slvec1b));
19599566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->sMvctx));
1960cb7b82ddSBarry Smith 
1961cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
19629566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
19639566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
19649566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
19659566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
19669566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQBAIJ));
1967cb7b82ddSBarry Smith 
1968526dfc15SBarry Smith   if (!B->preallocated) {
19699566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
19709566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
19719566063dSJacob Faibussowitsch     PetscCall(MatSetType(b->A, MATSEQSBAIJ));
19729566063dSJacob Faibussowitsch     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1973526dfc15SBarry Smith   }
1974a23d5eceSKris Buschelman 
19759566063dSJacob Faibussowitsch   PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
19769566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
197726fbe8dcSKarl Rupp 
1978526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
1979cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
1980cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
1981a23d5eceSKris Buschelman   PetscFunctionReturn(0);
1982a23d5eceSKris Buschelman }
1983a23d5eceSKris Buschelman 
1984*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1985*d71ae5a4SJacob Faibussowitsch {
198602106b30SBarry Smith   PetscInt        m, rstart, cend;
1987f4259b30SLisandro Dalcin   PetscInt        i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1988f4259b30SLisandro Dalcin   const PetscInt *JJ          = NULL;
1989f4259b30SLisandro Dalcin   PetscScalar    *values      = NULL;
1990bb80cfbbSStefano Zampini   PetscBool       roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
19913bd0feecSPierre Jolivet   PetscBool       nooffprocentries;
1992dfb205c3SBarry Smith 
1993dfb205c3SBarry Smith   PetscFunctionBegin;
19945f80ce2aSJacob Faibussowitsch   PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
19959566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
19969566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
19979566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
19989566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
19999566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2000dfb205c3SBarry Smith   m      = B->rmap->n / bs;
2001dfb205c3SBarry Smith   rstart = B->rmap->rstart / bs;
2002dfb205c3SBarry Smith   cend   = B->cmap->rend / bs;
2003dfb205c3SBarry Smith 
20045f80ce2aSJacob Faibussowitsch   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
20059566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2006dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2007dfb205c3SBarry Smith     nz = ii[i + 1] - ii[i];
20085f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
20090cd7f59aSBarry Smith     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2010dfb205c3SBarry Smith     JJ = jj + ii[i];
20110cd7f59aSBarry Smith     bd = 0;
2012dfb205c3SBarry Smith     for (j = 0; j < nz; j++) {
20130cd7f59aSBarry Smith       if (*JJ >= i + rstart) break;
2014dfb205c3SBarry Smith       JJ++;
20150cd7f59aSBarry Smith       bd++;
2016dfb205c3SBarry Smith     }
2017dfb205c3SBarry Smith     d = 0;
2018dfb205c3SBarry Smith     for (; j < nz; j++) {
2019dfb205c3SBarry Smith       if (*JJ++ >= cend) break;
2020dfb205c3SBarry Smith       d++;
2021dfb205c3SBarry Smith     }
2022dfb205c3SBarry Smith     d_nnz[i] = d;
20230cd7f59aSBarry Smith     o_nnz[i] = nz - d - bd;
20240cd7f59aSBarry Smith     nz       = nz - bd;
20250cd7f59aSBarry Smith     nz_max   = PetscMax(nz_max, nz);
2026dfb205c3SBarry Smith   }
20279566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
20289566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
20299566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
2030dfb205c3SBarry Smith 
2031dfb205c3SBarry Smith   values = (PetscScalar *)V;
203248a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2033dfb205c3SBarry Smith   for (i = 0; i < m; i++) {
2034dfb205c3SBarry Smith     PetscInt        row   = i + rstart;
2035dfb205c3SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
2036dfb205c3SBarry Smith     const PetscInt *icols = jj + ii[i];
2037bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2038dfb205c3SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
20399566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2040bb80cfbbSStefano Zampini     } else { /* block ordering does not match so we can only insert one block at a time. */
2041bb80cfbbSStefano Zampini       PetscInt j;
20420cd7f59aSBarry Smith       for (j = 0; j < ncols; j++) {
20430cd7f59aSBarry Smith         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
20449566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
20450cd7f59aSBarry Smith       }
20460cd7f59aSBarry Smith     }
2047dfb205c3SBarry Smith   }
2048dfb205c3SBarry Smith 
20499566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
20503bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
20513bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
20529566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
20539566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
20543bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
20553bd0feecSPierre Jolivet 
20569566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2057dfb205c3SBarry Smith   PetscFunctionReturn(0);
2058dfb205c3SBarry Smith }
2059dfb205c3SBarry Smith 
20600bad9183SKris Buschelman /*MC
2061fafad747SKris Buschelman    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2062828413b8SBarry Smith    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2063828413b8SBarry Smith    the matrix is stored.
2064828413b8SBarry Smith 
2065828413b8SBarry Smith    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
206611a5261eSBarry Smith    can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
20670bad9183SKris Buschelman 
20680bad9183SKris Buschelman    Options Database Keys:
206911a5261eSBarry Smith . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
20700bad9183SKris Buschelman 
207111a5261eSBarry Smith    Note:
2072476417e5SBarry Smith      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2073476417e5SBarry Smith      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2074476417e5SBarry Smith 
20750bad9183SKris Buschelman    Level: beginner
20760bad9183SKris Buschelman 
207711a5261eSBarry Smith .seealso: `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
20780bad9183SKris Buschelman M*/
20790bad9183SKris Buschelman 
2080*d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2081*d71ae5a4SJacob Faibussowitsch {
2082b5df2d14SHong Zhang   Mat_MPISBAIJ *b;
208394ae4db5SBarry Smith   PetscBool     flg = PETSC_FALSE;
2084b5df2d14SHong Zhang 
2085b5df2d14SHong Zhang   PetscFunctionBegin;
20864dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
2087b0a32e0cSBarry Smith   B->data = (void *)b;
20889566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2089b5df2d14SHong Zhang 
2090b5df2d14SHong Zhang   B->ops->destroy = MatDestroy_MPISBAIJ;
2091b5df2d14SHong Zhang   B->ops->view    = MatView_MPISBAIJ;
2092b5df2d14SHong Zhang   B->assembled    = PETSC_FALSE;
2093b5df2d14SHong Zhang   B->insertmode   = NOT_SET_VALUES;
209426fbe8dcSKarl Rupp 
20959566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
20969566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2097b5df2d14SHong Zhang 
2098b5df2d14SHong Zhang   /* build local table of row and column ownerships */
20999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2100b5df2d14SHong Zhang 
2101b5df2d14SHong Zhang   /* build cache for off array entries formed */
21029566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
210326fbe8dcSKarl Rupp 
2104b5df2d14SHong Zhang   b->donotstash  = PETSC_FALSE;
21050298fd71SBarry Smith   b->colmap      = NULL;
21060298fd71SBarry Smith   b->garray      = NULL;
2107b5df2d14SHong Zhang   b->roworiented = PETSC_TRUE;
2108b5df2d14SHong Zhang 
2109b5df2d14SHong Zhang   /* stuff used in block assembly */
2110f4259b30SLisandro Dalcin   b->barray = NULL;
2111b5df2d14SHong Zhang 
2112b5df2d14SHong Zhang   /* stuff used for matrix vector multiply */
2113f4259b30SLisandro Dalcin   b->lvec    = NULL;
2114f4259b30SLisandro Dalcin   b->Mvctx   = NULL;
2115f4259b30SLisandro Dalcin   b->slvec0  = NULL;
2116f4259b30SLisandro Dalcin   b->slvec0b = NULL;
2117f4259b30SLisandro Dalcin   b->slvec1  = NULL;
2118f4259b30SLisandro Dalcin   b->slvec1a = NULL;
2119f4259b30SLisandro Dalcin   b->slvec1b = NULL;
2120f4259b30SLisandro Dalcin   b->sMvctx  = NULL;
2121b5df2d14SHong Zhang 
2122b5df2d14SHong Zhang   /* stuff for MatGetRow() */
2123f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2124f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2125b5df2d14SHong Zhang   b->getrowactive = PETSC_FALSE;
2126b5df2d14SHong Zhang 
2127b5df2d14SHong Zhang   /* hash table stuff */
2128f4259b30SLisandro Dalcin   b->ht           = NULL;
2129f4259b30SLisandro Dalcin   b->hd           = NULL;
2130b5df2d14SHong Zhang   b->ht_size      = 0;
2131b5df2d14SHong Zhang   b->ht_flag      = PETSC_FALSE;
2132b5df2d14SHong Zhang   b->ht_fact      = 0;
2133b5df2d14SHong Zhang   b->ht_total_ct  = 0;
2134b5df2d14SHong Zhang   b->ht_insert_ct = 0;
2135b5df2d14SHong Zhang 
21367dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
21377a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
21387a868f3eSHong Zhang 
2139f4259b30SLisandro Dalcin   b->in_loc = NULL;
2140f4259b30SLisandro Dalcin   b->v_loc  = NULL;
214159ffdab8SBarry Smith   b->n_loc  = 0;
214294ae4db5SBarry Smith 
21439566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
21449566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
21459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
21469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
21476214f412SHong Zhang #if defined(PETSC_HAVE_ELEMENTAL)
21489566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
21496214f412SHong Zhang #endif
2150d24d4204SJose E. Roman #if defined(PETSC_HAVE_SCALAPACK)
21519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2152d24d4204SJose E. Roman #endif
21539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
21549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2155aa5a9175SDahai Guo 
2156b94d7dedSBarry Smith   B->symmetric                   = PETSC_BOOL3_TRUE;
2157b94d7dedSBarry Smith   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2158b94d7dedSBarry Smith   B->symmetry_eternal            = PETSC_TRUE;
2159b94d7dedSBarry Smith   B->structural_symmetry_eternal = PETSC_TRUE;
2160eb1ec7c1SStefano Zampini #if defined(PETSC_USE_COMPLEX)
2161b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_FALSE;
2162eb1ec7c1SStefano Zampini #else
2163b94d7dedSBarry Smith   B->hermitian = PETSC_BOOL3_TRUE;
2164eb1ec7c1SStefano Zampini #endif
216513647f61SHong Zhang 
21669566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2167d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
21689566063dSJacob Faibussowitsch   PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
216994ae4db5SBarry Smith   if (flg) {
217094ae4db5SBarry Smith     PetscReal fact = 1.39;
21719566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
21729566063dSJacob Faibussowitsch     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
217394ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
21749566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
21759566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
217694ae4db5SBarry Smith   }
2177d0609cedSBarry Smith   PetscOptionsEnd();
2178b5df2d14SHong Zhang   PetscFunctionReturn(0);
2179b5df2d14SHong Zhang }
2180b5df2d14SHong Zhang 
2181209238afSKris Buschelman /*MC
2182002d173eSKris Buschelman    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2183209238afSKris Buschelman 
218411a5261eSBarry Smith    This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
218511a5261eSBarry Smith    and `MATMPISBAIJ` otherwise.
2186209238afSKris Buschelman 
218711a5261eSBarry Smith    Options Database Key:
2188209238afSKris Buschelman . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2189209238afSKris Buschelman 
2190209238afSKris Buschelman   Level: beginner
2191209238afSKris Buschelman 
219211a5261eSBarry Smith .seealso: `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2193209238afSKris Buschelman M*/
2194209238afSKris Buschelman 
2195b5df2d14SHong Zhang /*@C
2196b5df2d14SHong Zhang    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2197b5df2d14SHong Zhang    the user should preallocate the matrix storage by setting the parameters
2198b5df2d14SHong Zhang    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2199b5df2d14SHong Zhang    performance can be increased by more than a factor of 50.
2200b5df2d14SHong Zhang 
220111a5261eSBarry Smith    Collective on B
2202b5df2d14SHong Zhang 
2203b5df2d14SHong Zhang    Input Parameters:
22041c4f3114SJed Brown +  B - the matrix
2205bb7ae925SBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2206bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2207b5df2d14SHong Zhang .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2208b5df2d14SHong Zhang            submatrix  (same for all local rows)
2209b5df2d14SHong Zhang .  d_nnz - array containing the number of block nonzeros in the various block rows
22106d10fdaeSSatish Balay            in the upper triangular and diagonal part of the in diagonal portion of the local
22110298fd71SBarry Smith            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
221295742e49SBarry Smith            for the diagonal entry and set a value even if it is zero.
2213b5df2d14SHong Zhang .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2214b5df2d14SHong Zhang            submatrix (same for all local rows).
2215b5df2d14SHong Zhang -  o_nnz - array containing the number of nonzeros in the various block rows of the
2216c2fc9fa9SBarry Smith            off-diagonal portion of the local submatrix that is right of the diagonal
22170298fd71SBarry Smith            (possibly different for each block row) or NULL.
2218b5df2d14SHong Zhang 
2219b5df2d14SHong Zhang    Options Database Keys:
2220a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2221b5df2d14SHong Zhang                      block calculations (much slower)
2222a2b725a8SWilliam Gropp -   -mat_block_size - size of the blocks to use
2223b5df2d14SHong Zhang 
2224b5df2d14SHong Zhang    Notes:
2225b5df2d14SHong Zhang 
222611a5261eSBarry Smith    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2227b5df2d14SHong Zhang    than it must be used on all processors that share the object for that argument.
2228b5df2d14SHong Zhang 
222949a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
223049a6f317SBarry Smith 
2231b5df2d14SHong Zhang    Storage Information:
2232b5df2d14SHong Zhang    For a square global matrix we define each processor's diagonal portion
2233b5df2d14SHong Zhang    to be its local rows and the corresponding columns (a square submatrix);
2234b5df2d14SHong Zhang    each processor's off-diagonal portion encompasses the remainder of the
2235b5df2d14SHong Zhang    local matrix (a rectangular submatrix).
2236b5df2d14SHong Zhang 
2237b5df2d14SHong Zhang    The user can specify preallocated storage for the diagonal part of
2238b5df2d14SHong Zhang    the local submatrix with either d_nz or d_nnz (not both).  Set
223911a5261eSBarry Smith    d_nz = `PETSC_DEFAULT` and d_nnz = NULL for PETSc to control dynamic
2240b5df2d14SHong Zhang    memory allocation.  Likewise, specify preallocated storage for the
2241b5df2d14SHong Zhang    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2242b5df2d14SHong Zhang 
224311a5261eSBarry Smith    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2244aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2245aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
2246aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
2247aa95bbe8SBarry Smith 
2248b5df2d14SHong Zhang    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2249b5df2d14SHong Zhang    the figure below we depict these three local rows and all columns (0-11).
2250b5df2d14SHong Zhang 
2251b5df2d14SHong Zhang .vb
2252b5df2d14SHong Zhang            0 1 2 3 4 5 6 7 8 9 10 11
2253a4b1a0f6SJed Brown           --------------------------
2254c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2255c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2256c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2257a4b1a0f6SJed Brown           --------------------------
2258b5df2d14SHong Zhang .ve
2259b5df2d14SHong Zhang 
2260b5df2d14SHong Zhang    Thus, any entries in the d locations are stored in the d (diagonal)
2261b5df2d14SHong Zhang    submatrix, and any entries in the o locations are stored in the
22626d10fdaeSSatish Balay    o (off-diagonal) submatrix.  Note that the d matrix is stored in
226311a5261eSBarry Smith    `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2264b5df2d14SHong Zhang 
22656d10fdaeSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
22666d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
2267c2fc9fa9SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix
2268c2fc9fa9SBarry Smith 
2269b5df2d14SHong Zhang    In general, for PDE problems in which most nonzeros are near the diagonal,
2270b5df2d14SHong Zhang    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2271b5df2d14SHong Zhang    or you will get TERRIBLE performance; see the users' manual chapter on
2272b5df2d14SHong Zhang    matrices.
2273b5df2d14SHong Zhang 
2274b5df2d14SHong Zhang    Level: intermediate
2275b5df2d14SHong Zhang 
227611a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2277b5df2d14SHong Zhang @*/
2278*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2279*d71ae5a4SJacob Faibussowitsch {
2280b5df2d14SHong Zhang   PetscFunctionBegin;
22816ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
22826ba663aaSJed Brown   PetscValidType(B, 1);
22836ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
2284cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2285b5df2d14SHong Zhang   PetscFunctionReturn(0);
2286b5df2d14SHong Zhang }
2287b5df2d14SHong Zhang 
2288a30f8f8cSSatish Balay /*@C
228911a5261eSBarry Smith    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2290a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2291a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2292a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2293a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2294a30f8f8cSSatish Balay 
2295d083f849SBarry Smith    Collective
2296a30f8f8cSSatish Balay 
2297a30f8f8cSSatish Balay    Input Parameters:
2298a30f8f8cSSatish Balay +  comm - MPI communicator
229911a5261eSBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2300bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
230111a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
2302a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2303a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
230411a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
2305a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2306a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
230711a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
230811a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2309a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2310a30f8f8cSSatish Balay            submatrix  (same for all local rows)
2311a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
23126d10fdaeSSatish Balay            in the upper triangular portion of the in diagonal portion of the local
23130298fd71SBarry Smith            (possibly different for each block block row) or NULL.
231495742e49SBarry Smith            If you plan to factor the matrix you must leave room for the diagonal entry and
231595742e49SBarry Smith            set its value even if it is zero.
2316a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2317a30f8f8cSSatish Balay            submatrix (same for all local rows).
2318a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2319a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
23200298fd71SBarry Smith            each block row) or NULL.
2321a30f8f8cSSatish Balay 
2322a30f8f8cSSatish Balay    Output Parameter:
2323a30f8f8cSSatish Balay .  A - the matrix
2324a30f8f8cSSatish Balay 
2325a30f8f8cSSatish Balay    Options Database Keys:
2326a2b725a8SWilliam Gropp +   -mat_no_unroll - uses code that does not unroll the loops in the
2327a30f8f8cSSatish Balay                      block calculations (much slower)
2328a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2329a2b725a8SWilliam Gropp -   -mat_mpi - use the parallel matrix data structures even on one processor
2330a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2331a30f8f8cSSatish Balay 
233211a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2333f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
233411a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2335175b88e8SBarry Smith 
2336a30f8f8cSSatish Balay    Notes:
2337d1be2dadSMatthew Knepley    The number of rows and columns must be divisible by blocksize.
23386d6d819aSHong Zhang    This matrix type does not support complex Hermitian operation.
2339d1be2dadSMatthew Knepley 
2340a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2341a30f8f8cSSatish Balay    (possibly both).
2342a30f8f8cSSatish Balay 
234311a5261eSBarry Smith    If` PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2344a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2345a30f8f8cSSatish Balay 
234649a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
234749a6f317SBarry Smith 
2348a30f8f8cSSatish Balay    Storage Information:
2349a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2350a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2351a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2352a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2353a30f8f8cSSatish Balay 
2354a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2355a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
23560298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2357a30f8f8cSSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
2358a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2359a30f8f8cSSatish Balay 
2360a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2361a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2362a30f8f8cSSatish Balay 
2363a30f8f8cSSatish Balay .vb
2364a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2365a4b1a0f6SJed Brown           --------------------------
2366c2fc9fa9SBarry Smith    row 3  |. . . d d d o o o o  o  o
2367c2fc9fa9SBarry Smith    row 4  |. . . d d d o o o o  o  o
2368c2fc9fa9SBarry Smith    row 5  |. . . d d d o o o o  o  o
2369a4b1a0f6SJed Brown           --------------------------
2370a30f8f8cSSatish Balay .ve
2371a30f8f8cSSatish Balay 
2372a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2373a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
23746d10fdaeSSatish Balay    o (off-diagonal) submatrix.  Note that the d matrix is stored in
237511a5261eSBarry Smith    MatSeqSBAIJ format and the o submatrix in `MATSEQBAIJ` format.
2376a30f8f8cSSatish Balay 
23776d10fdaeSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
23786d10fdaeSSatish Balay    plus the diagonal part of the d matrix,
2379a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2380a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2381a30f8f8cSSatish Balay    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2382a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2383a30f8f8cSSatish Balay    matrices.
2384a30f8f8cSSatish Balay 
2385a30f8f8cSSatish Balay    Level: intermediate
2386a30f8f8cSSatish Balay 
238711a5261eSBarry Smith .seealso: `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2388a30f8f8cSSatish Balay @*/
2389a30f8f8cSSatish Balay 
2390*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2391*d71ae5a4SJacob Faibussowitsch {
23921302d50aSBarry Smith   PetscMPIInt size;
2393a30f8f8cSSatish Balay 
2394a30f8f8cSSatish Balay   PetscFunctionBegin;
23959566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
23969566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
23979566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
2398273d9f13SBarry Smith   if (size > 1) {
23999566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPISBAIJ));
24009566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2401273d9f13SBarry Smith   } else {
24029566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQSBAIJ));
24039566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2404273d9f13SBarry Smith   }
2405a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2406a30f8f8cSSatish Balay }
2407a30f8f8cSSatish Balay 
2408*d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2409*d71ae5a4SJacob Faibussowitsch {
2410a30f8f8cSSatish Balay   Mat           mat;
2411a30f8f8cSSatish Balay   Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2412d0f46423SBarry Smith   PetscInt      len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2413387bc808SHong Zhang   PetscScalar  *array;
2414a30f8f8cSSatish Balay 
2415a30f8f8cSSatish Balay   PetscFunctionBegin;
2416f4259b30SLisandro Dalcin   *newmat = NULL;
241726fbe8dcSKarl Rupp 
24189566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
24199566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
24209566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
24219566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
24229566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2423e1b6402fSHong Zhang 
2424d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
2425273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
242682327fa8SHong Zhang   mat->assembled    = PETSC_TRUE;
24277fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
24287fff6886SHong Zhang 
2429b5df2d14SHong Zhang   a      = (Mat_MPISBAIJ *)mat->data;
2430a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2431a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2432a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2433a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2434a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2435a30f8f8cSSatish Balay 
2436a30f8f8cSSatish Balay   a->size         = oldmat->size;
2437a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2438a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2439a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2440f4259b30SLisandro Dalcin   a->rowindices   = NULL;
2441f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
2442a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2443f4259b30SLisandro Dalcin   a->barray       = NULL;
2444899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
2445899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
2446899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
2447899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
2448a30f8f8cSSatish Balay 
2449a30f8f8cSSatish Balay   /* hash table stuff */
2450f4259b30SLisandro Dalcin   a->ht           = NULL;
2451f4259b30SLisandro Dalcin   a->hd           = NULL;
2452a30f8f8cSSatish Balay   a->ht_size      = 0;
2453a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2454a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2455a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2456a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2457a30f8f8cSSatish Balay 
24589566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2459a30f8f8cSSatish Balay   if (oldmat->colmap) {
2460a30f8f8cSSatish Balay #if defined(PETSC_USE_CTABLE)
24619566063dSJacob Faibussowitsch     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
2462a30f8f8cSSatish Balay #else
24639566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
24649566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2465a30f8f8cSSatish Balay #endif
2466f4259b30SLisandro Dalcin   } else a->colmap = NULL;
2467387bc808SHong Zhang 
2468a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
24699566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len, &a->garray));
24709566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2471f4259b30SLisandro Dalcin   } else a->garray = NULL;
2472a30f8f8cSSatish Balay 
24739566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
24749566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
24759566063dSJacob Faibussowitsch   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
247682327fa8SHong Zhang 
24779566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
24789566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2479387bc808SHong Zhang 
24809566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(a->slvec1, &nt));
24819566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec1, &array));
24829566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
24839566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
24849566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec1, &array));
24859566063dSJacob Faibussowitsch   PetscCall(VecGetArray(a->slvec0, &array));
24869566063dSJacob Faibussowitsch   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
24879566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(a->slvec0, &array));
2488387bc808SHong Zhang 
2489387bc808SHong Zhang   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
24909566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2491387bc808SHong Zhang   a->sMvctx = oldmat->sMvctx;
249282327fa8SHong Zhang 
24939566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
24949566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
24959566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2496a30f8f8cSSatish Balay   *newmat = mat;
2497a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2498a30f8f8cSSatish Balay }
2499a30f8f8cSSatish Balay 
2500618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
2501618cc2edSLisandro Dalcin #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2502618cc2edSLisandro Dalcin 
2503*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2504*d71ae5a4SJacob Faibussowitsch {
25057f489da9SVaclav Hapla   PetscBool isbinary;
250695936485SShri Abhyankar 
250795936485SShri Abhyankar   PetscFunctionBegin;
25089566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
25095f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
25109566063dSJacob Faibussowitsch   PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
251195936485SShri Abhyankar   PetscFunctionReturn(0);
251295936485SShri Abhyankar }
251395936485SShri Abhyankar 
2514*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2515*d71ae5a4SJacob Faibussowitsch {
251624d5174aSHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2517f4c0e9e4SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ *)(a->B)->data;
2518ca54ac64SHong Zhang   PetscReal     atmp;
251987828ca2SBarry Smith   PetscReal    *work, *svalues, *rvalues;
25201302d50aSBarry Smith   PetscInt      i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
25211302d50aSBarry Smith   PetscMPIInt   rank, size;
25221302d50aSBarry Smith   PetscInt     *rowners_bs, dest, count, source;
252387828ca2SBarry Smith   PetscScalar  *va;
25248a1c53f2SBarry Smith   MatScalar    *ba;
2525f4c0e9e4SHong Zhang   MPI_Status    stat;
252624d5174aSHong Zhang 
252724d5174aSHong Zhang   PetscFunctionBegin;
25285f80ce2aSJacob Faibussowitsch   PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
25299566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
25309566063dSJacob Faibussowitsch   PetscCall(VecGetArray(v, &va));
2531f4c0e9e4SHong Zhang 
25329566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
25339566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2534f4c0e9e4SHong Zhang 
2535d0f46423SBarry Smith   bs  = A->rmap->bs;
2536f4c0e9e4SHong Zhang   mbs = a->mbs;
2537f4c0e9e4SHong Zhang   Mbs = a->Mbs;
2538f4c0e9e4SHong Zhang   ba  = b->a;
2539f4c0e9e4SHong Zhang   bi  = b->i;
2540f4c0e9e4SHong Zhang   bj  = b->j;
2541f4c0e9e4SHong Zhang 
2542f4c0e9e4SHong Zhang   /* find ownerships */
2543d0f46423SBarry Smith   rowners_bs = A->rmap->range;
2544f4c0e9e4SHong Zhang 
2545f4c0e9e4SHong Zhang   /* each proc creates an array to be distributed */
25469566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(bs * Mbs, &work));
2547f4c0e9e4SHong Zhang 
2548f4c0e9e4SHong Zhang   /* row_max for B */
2549b8475685SHong Zhang   if (rank != size - 1) {
2550f4c0e9e4SHong Zhang     for (i = 0; i < mbs; i++) {
25519371c9d4SSatish Balay       ncols = bi[1] - bi[0];
25529371c9d4SSatish Balay       bi++;
2553f4c0e9e4SHong Zhang       brow = bs * i;
2554f4c0e9e4SHong Zhang       for (j = 0; j < ncols; j++) {
2555f4c0e9e4SHong Zhang         bcol = bs * (*bj);
2556f4c0e9e4SHong Zhang         for (kcol = 0; kcol < bs; kcol++) {
2557ca54ac64SHong Zhang           col = bcol + kcol;           /* local col index */
255804d41228SHong Zhang           col += rowners_bs[rank + 1]; /* global col index */
2559f4c0e9e4SHong Zhang           for (krow = 0; krow < bs; krow++) {
25609371c9d4SSatish Balay             atmp = PetscAbsScalar(*ba);
25619371c9d4SSatish Balay             ba++;
2562ca54ac64SHong Zhang             row = brow + krow; /* local row index */
2563ca54ac64SHong Zhang             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2564f4c0e9e4SHong Zhang             if (work[col] < atmp) work[col] = atmp;
2565f4c0e9e4SHong Zhang           }
2566f4c0e9e4SHong Zhang         }
2567f4c0e9e4SHong Zhang         bj++;
2568f4c0e9e4SHong Zhang       }
2569f4c0e9e4SHong Zhang     }
2570f4c0e9e4SHong Zhang 
2571f4c0e9e4SHong Zhang     /* send values to its owners */
2572f4c0e9e4SHong Zhang     for (dest = rank + 1; dest < size; dest++) {
2573f4c0e9e4SHong Zhang       svalues = work + rowners_bs[dest];
2574ca54ac64SHong Zhang       count   = rowners_bs[dest + 1] - rowners_bs[dest];
25759566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2576ca54ac64SHong Zhang     }
2577f4c0e9e4SHong Zhang   }
2578f4c0e9e4SHong Zhang 
2579f4c0e9e4SHong Zhang   /* receive values */
2580ca54ac64SHong Zhang   if (rank) {
2581f4c0e9e4SHong Zhang     rvalues = work;
2582ca54ac64SHong Zhang     count   = rowners_bs[rank + 1] - rowners_bs[rank];
2583f4c0e9e4SHong Zhang     for (source = 0; source < rank; source++) {
25849566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2585f4c0e9e4SHong Zhang       /* process values */
2586f4c0e9e4SHong Zhang       for (i = 0; i < count; i++) {
2587ca54ac64SHong Zhang         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2588f4c0e9e4SHong Zhang       }
2589f4c0e9e4SHong Zhang     }
2590ca54ac64SHong Zhang   }
2591f4c0e9e4SHong Zhang 
25929566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(v, &va));
25939566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
259424d5174aSHong Zhang   PetscFunctionReturn(0);
259524d5174aSHong Zhang }
25962798e883SHong Zhang 
2597*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2598*d71ae5a4SJacob Faibussowitsch {
25992798e883SHong Zhang   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ *)matin->data;
2600d0f46423SBarry Smith   PetscInt           mbs = mat->mbs, bs = matin->rmap->bs;
26013649974fSBarry Smith   PetscScalar       *x, *ptr, *from;
2602ffe4fb16SHong Zhang   Vec                bb1;
26033649974fSBarry Smith   const PetscScalar *b;
2604ffe4fb16SHong Zhang 
2605ffe4fb16SHong Zhang   PetscFunctionBegin;
26065f80ce2aSJacob Faibussowitsch   PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
26075f80ce2aSJacob Faibussowitsch   PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2608ffe4fb16SHong Zhang 
2609a2b30743SBarry Smith   if (flag == SOR_APPLY_UPPER) {
26109566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2611a2b30743SBarry Smith     PetscFunctionReturn(0);
2612a2b30743SBarry Smith   }
2613a2b30743SBarry Smith 
2614ffe4fb16SHong Zhang   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2615ffe4fb16SHong Zhang     if (flag & SOR_ZERO_INITIAL_GUESS) {
26169566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2617ffe4fb16SHong Zhang       its--;
2618ffe4fb16SHong Zhang     }
2619ffe4fb16SHong Zhang 
26209566063dSJacob Faibussowitsch     PetscCall(VecDuplicate(bb, &bb1));
2621ffe4fb16SHong Zhang     while (its--) {
2622ffe4fb16SHong Zhang       /* lower triangular part: slvec0b = - B^T*xx */
26239566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2624ffe4fb16SHong Zhang 
2625ffe4fb16SHong Zhang       /* copy xx into slvec0a */
26269566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec0, &ptr));
26279566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26289566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, x, bs * mbs));
26299566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2630ffe4fb16SHong Zhang 
26319566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->slvec0, -1.0));
2632ffe4fb16SHong Zhang 
2633ffe4fb16SHong Zhang       /* copy bb into slvec1a */
26349566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1, &ptr));
26359566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26369566063dSJacob Faibussowitsch       PetscCall(PetscArraycpy(ptr, b, bs * mbs));
26379566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2638ffe4fb16SHong Zhang 
2639ffe4fb16SHong Zhang       /* set slvec1b = 0 */
26409566063dSJacob Faibussowitsch       PetscCall(VecSet(mat->slvec1b, 0.0));
2641ffe4fb16SHong Zhang 
26429566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
26439566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
26449566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
26459566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2646ffe4fb16SHong Zhang 
2647ffe4fb16SHong Zhang       /* upper triangular part: bb1 = bb1 - B*x */
26489566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2649ffe4fb16SHong Zhang 
2650ffe4fb16SHong Zhang       /* local diagonal sweep */
26519566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2652ffe4fb16SHong Zhang     }
26539566063dSJacob Faibussowitsch     PetscCall(VecDestroy(&bb1));
2654fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26559566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2656fa22f6d0SBarry Smith   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
26579566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2658fa22f6d0SBarry Smith   } else if (flag & SOR_EISENSTAT) {
2659fa22f6d0SBarry Smith     Vec                xx1;
2660ace3abfcSBarry Smith     PetscBool          hasop;
266120f1ed55SBarry Smith     const PetscScalar *diag;
2662887ee2caSBarry Smith     PetscScalar       *sl, scale = (omega - 2.0) / omega;
266320f1ed55SBarry Smith     PetscInt           i, n;
2664fa22f6d0SBarry Smith 
2665fa22f6d0SBarry Smith     if (!mat->xx1) {
26669566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->xx1));
26679566063dSJacob Faibussowitsch       PetscCall(VecDuplicate(bb, &mat->bb1));
2668fa22f6d0SBarry Smith     }
2669fa22f6d0SBarry Smith     xx1 = mat->xx1;
2670fa22f6d0SBarry Smith     bb1 = mat->bb1;
2671fa22f6d0SBarry Smith 
26729566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2673fa22f6d0SBarry Smith 
2674fa22f6d0SBarry Smith     if (!mat->diag) {
2675effcda25SBarry Smith       /* this is wrong for same matrix with new nonzero values */
26769566063dSJacob Faibussowitsch       PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
26779566063dSJacob Faibussowitsch       PetscCall(MatGetDiagonal(matin, mat->diag));
2678fa22f6d0SBarry Smith     }
26799566063dSJacob Faibussowitsch     PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2680fa22f6d0SBarry Smith 
2681fa22f6d0SBarry Smith     if (hasop) {
26829566063dSJacob Faibussowitsch       PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
26839566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a, scale, bb));
268420f1ed55SBarry Smith     } else {
268520f1ed55SBarry Smith       /*
268620f1ed55SBarry Smith           These two lines are replaced by code that may be a bit faster for a good compiler
26879566063dSJacob Faibussowitsch       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
26889566063dSJacob Faibussowitsch       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
268920f1ed55SBarry Smith       */
26909566063dSJacob Faibussowitsch       PetscCall(VecGetArray(mat->slvec1a, &sl));
26919566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(mat->diag, &diag));
26929566063dSJacob Faibussowitsch       PetscCall(VecGetArrayRead(bb, &b));
26939566063dSJacob Faibussowitsch       PetscCall(VecGetArray(xx, &x));
26949566063dSJacob Faibussowitsch       PetscCall(VecGetLocalSize(xx, &n));
2695887ee2caSBarry Smith       if (omega == 1.0) {
269626fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
26979566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(2.0 * n));
2698887ee2caSBarry Smith       } else {
269926fbe8dcSKarl Rupp         for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
27009566063dSJacob Faibussowitsch         PetscCall(PetscLogFlops(3.0 * n));
2701887ee2caSBarry Smith       }
27029566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(mat->slvec1a, &sl));
27039566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(mat->diag, &diag));
27049566063dSJacob Faibussowitsch       PetscCall(VecRestoreArrayRead(bb, &b));
27059566063dSJacob Faibussowitsch       PetscCall(VecRestoreArray(xx, &x));
270620f1ed55SBarry Smith     }
2707fa22f6d0SBarry Smith 
2708fa22f6d0SBarry Smith     /* multiply off-diagonal portion of matrix */
27099566063dSJacob Faibussowitsch     PetscCall(VecSet(mat->slvec1b, 0.0));
27109566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
27119566063dSJacob Faibussowitsch     PetscCall(VecGetArray(mat->slvec0, &from));
27129566063dSJacob Faibussowitsch     PetscCall(VecGetArray(xx, &x));
27139566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(from, x, bs * mbs));
27149566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(mat->slvec0, &from));
27159566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(xx, &x));
27169566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27179566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
27189566063dSJacob Faibussowitsch     PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2719fa22f6d0SBarry Smith 
2720fa22f6d0SBarry Smith     /* local sweep */
27219566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
27229566063dSJacob Faibussowitsch     PetscCall(VecAXPY(xx, 1.0, xx1));
2723f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2724ffe4fb16SHong Zhang   PetscFunctionReturn(0);
2725ffe4fb16SHong Zhang }
2726ffe4fb16SHong Zhang 
2727dfb205c3SBarry Smith /*@
272811a5261eSBarry Smith      MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2729dfb205c3SBarry Smith          CSR format the local rows.
2730dfb205c3SBarry Smith 
2731d083f849SBarry Smith    Collective
2732dfb205c3SBarry Smith 
2733dfb205c3SBarry Smith    Input Parameters:
2734dfb205c3SBarry Smith +  comm - MPI communicator
2735dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
273611a5261eSBarry Smith .  m - number of local rows (Cannot be `PETSC_DECIDE`)
2737dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
273811a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2739dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
274011a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
274111a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
2742483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2743dfb205c3SBarry Smith .   j - column indices
2744dfb205c3SBarry Smith -   a - matrix values
2745dfb205c3SBarry Smith 
2746dfb205c3SBarry Smith    Output Parameter:
2747dfb205c3SBarry Smith .   mat - the matrix
2748dfb205c3SBarry Smith 
2749dfb205c3SBarry Smith    Level: intermediate
2750dfb205c3SBarry Smith 
2751dfb205c3SBarry Smith    Notes:
2752dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2753dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2754dfb205c3SBarry Smith      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2755dfb205c3SBarry Smith 
2756dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2757dfb205c3SBarry Smith 
275811a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2759db781477SPatrick Sanan           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2760dfb205c3SBarry Smith @*/
2761*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2762*d71ae5a4SJacob Faibussowitsch {
2763dfb205c3SBarry Smith   PetscFunctionBegin;
27645f80ce2aSJacob Faibussowitsch   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
27655f80ce2aSJacob Faibussowitsch   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
27669566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
27679566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
27689566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPISBAIJ));
27699566063dSJacob Faibussowitsch   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2770dfb205c3SBarry Smith   PetscFunctionReturn(0);
2771dfb205c3SBarry Smith }
2772dfb205c3SBarry Smith 
2773dfb205c3SBarry Smith /*@C
277411a5261eSBarry Smith    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2775dfb205c3SBarry Smith 
2776d083f849SBarry Smith    Collective
2777dfb205c3SBarry Smith 
2778dfb205c3SBarry Smith    Input Parameters:
27791c4f3114SJed Brown +  B - the matrix
2780dfb205c3SBarry Smith .  bs - the block size
2781dfb205c3SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2782dfb205c3SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2783dfb205c3SBarry Smith -  v - optional values in the matrix
2784dfb205c3SBarry Smith 
2785664954b6SBarry Smith    Level: advanced
2786664954b6SBarry Smith 
2787664954b6SBarry Smith    Notes:
27880cd7f59aSBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
27890cd7f59aSBarry Smith    and usually the numerical values as well
27900cd7f59aSBarry Smith 
279150c5228eSBarry Smith    Any entries below the diagonal are ignored
2792dfb205c3SBarry Smith 
279311a5261eSBarry Smith .seealso: `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2794dfb205c3SBarry Smith @*/
2795*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2796*d71ae5a4SJacob Faibussowitsch {
2797dfb205c3SBarry Smith   PetscFunctionBegin;
2798cac4c232SBarry Smith   PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2799dfb205c3SBarry Smith   PetscFunctionReturn(0);
2800dfb205c3SBarry Smith }
2801dfb205c3SBarry Smith 
2802*d71ae5a4SJacob Faibussowitsch PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2803*d71ae5a4SJacob Faibussowitsch {
280410c56fdeSHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
280510c56fdeSHong Zhang   PetscInt    *indx;
280610c56fdeSHong Zhang   PetscScalar *values;
2807dfb205c3SBarry Smith 
28084dcd73b1SHong Zhang   PetscFunctionBegin;
28099566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
281010c56fdeSHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
281110c56fdeSHong Zhang     Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2812de25e9cbSPierre Jolivet     PetscInt     *dnz, *onz, mbs, Nbs, nbs;
281310c56fdeSHong Zhang     PetscInt     *bindx, rmax = a->rmax, j;
2814de25e9cbSPierre Jolivet     PetscMPIInt   rank, size;
28154dcd73b1SHong Zhang 
28169566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28179371c9d4SSatish Balay     mbs = m / bs;
28189371c9d4SSatish Balay     Nbs = N / cbs;
281948a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2820da91a574SPierre Jolivet     nbs = n / cbs;
28214dcd73b1SHong Zhang 
28229566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(rmax, &bindx));
2823d0609cedSBarry Smith     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2824de25e9cbSPierre Jolivet 
28259566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
28269566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &size));
2827de25e9cbSPierre Jolivet     if (rank == size - 1) {
2828de25e9cbSPierre Jolivet       /* Check sum(nbs) = Nbs */
28295f80ce2aSJacob Faibussowitsch       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2830de25e9cbSPierre Jolivet     }
2831de25e9cbSPierre Jolivet 
2832d0609cedSBarry Smith     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
28339566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
283410c56fdeSHong Zhang     for (i = 0; i < mbs; i++) {
28359566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
28364dcd73b1SHong Zhang       nnz = nnz / bs;
28374dcd73b1SHong Zhang       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
28389566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
28399566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
28404dcd73b1SHong Zhang     }
28419566063dSJacob Faibussowitsch     PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28429566063dSJacob Faibussowitsch     PetscCall(PetscFree(bindx));
28434dcd73b1SHong Zhang 
28449566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
28459566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
28469566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
28479566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, MATSBAIJ));
28489566063dSJacob Faibussowitsch     PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
28499566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2850d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
28514dcd73b1SHong Zhang   }
28524dcd73b1SHong Zhang 
285310c56fdeSHong Zhang   /* numeric phase */
28549566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
28559566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
28564dcd73b1SHong Zhang 
28579566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
28584dcd73b1SHong Zhang   for (i = 0; i < m; i++) {
28599566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28604dcd73b1SHong Zhang     Ii = i + rstart;
28619566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
28629566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
28634dcd73b1SHong Zhang   }
28649566063dSJacob Faibussowitsch   PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
28659566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
28669566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
28674dcd73b1SHong Zhang   PetscFunctionReturn(0);
28684dcd73b1SHong Zhang }
2869