xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4dfa11a44d5adf2389f1d3acbc8f3c1116dc6c3a)
1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I  "petscmat.h"  I*/
2c5d9258eSSatish Balay 
3b51a4376SLisandro Dalcin #include <petsc/private/hashseti.h>
4c6db04a5SJed Brown #include <petscblaslapack.h>
565a92638SMatthew G. Knepley #include <petscsf.h>
679bdfe76SSatish Balay 
77ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
87ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *);
97ea3e4caSstefano_zampini #endif
107ea3e4caSstefano_zampini 
119371c9d4SSatish Balay PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) {
127843d17aSBarry Smith   Mat_MPIBAIJ       *a = (Mat_MPIBAIJ *)A->data;
134e879edeSHong Zhang   PetscInt           i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs;
144e879edeSHong Zhang   PetscScalar       *va, *vv;
154e879edeSHong Zhang   Vec                vB, vA;
164e879edeSHong Zhang   const PetscScalar *vb;
177843d17aSBarry Smith 
187843d17aSBarry Smith   PetscFunctionBegin;
199566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA));
209566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->A, vA, idx));
214e879edeSHong Zhang 
229566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(vA, &va));
23985db425SBarry Smith   if (idx) {
244e879edeSHong Zhang     for (i = 0; i < m; i++) {
2526fbe8dcSKarl Rupp       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2626fbe8dcSKarl Rupp     }
27985db425SBarry Smith   }
287843d17aSBarry Smith 
299566063dSJacob Faibussowitsch   PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB));
309566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &idxb));
319566063dSJacob Faibussowitsch   PetscCall(MatGetRowMaxAbs(a->B, vB, idxb));
327843d17aSBarry Smith 
339566063dSJacob Faibussowitsch   PetscCall(VecGetArrayWrite(v, &vv));
349566063dSJacob Faibussowitsch   PetscCall(VecGetArrayRead(vB, &vb));
354e879edeSHong Zhang   for (i = 0; i < m; i++) {
3626fbe8dcSKarl Rupp     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
374e879edeSHong Zhang       vv[i] = vb[i];
384e879edeSHong Zhang       if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
394e879edeSHong Zhang     } else {
404e879edeSHong Zhang       vv[i] = va[i];
419371c9d4SSatish Balay       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs);
4226fbe8dcSKarl Rupp     }
437843d17aSBarry Smith   }
449566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(vA, &vv));
459566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayWrite(vA, &va));
469566063dSJacob Faibussowitsch   PetscCall(VecRestoreArrayRead(vB, &vb));
479566063dSJacob Faibussowitsch   PetscCall(PetscFree(idxb));
489566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&vA));
499566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&vB));
507843d17aSBarry Smith   PetscFunctionReturn(0);
517843d17aSBarry Smith }
527843d17aSBarry Smith 
539371c9d4SSatish Balay PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) {
547fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
557fc3c18eSBarry Smith 
567fc3c18eSBarry Smith   PetscFunctionBegin;
579566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->A));
589566063dSJacob Faibussowitsch   PetscCall(MatStoreValues(aij->B));
597fc3c18eSBarry Smith   PetscFunctionReturn(0);
607fc3c18eSBarry Smith }
617fc3c18eSBarry Smith 
629371c9d4SSatish Balay PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) {
637fc3c18eSBarry Smith   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
647fc3c18eSBarry Smith 
657fc3c18eSBarry Smith   PetscFunctionBegin;
669566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->A));
679566063dSJacob Faibussowitsch   PetscCall(MatRetrieveValues(aij->B));
687fc3c18eSBarry Smith   PetscFunctionReturn(0);
697fc3c18eSBarry Smith }
707fc3c18eSBarry Smith 
71537820f0SBarry Smith /*
72537820f0SBarry Smith      Local utility routine that creates a mapping from the global column
7357b952d6SSatish Balay    number to the local number in the off-diagonal part of the local
74e06f6af7SJed Brown    storage of the matrix.  This is done in a non scalable way since the
7557b952d6SSatish Balay    length of colmap equals the global matrix length.
7657b952d6SSatish Balay */
779371c9d4SSatish Balay PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) {
7857b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
7957b952d6SSatish Balay   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
80d0f46423SBarry Smith   PetscInt     nbs = B->nbs, i, bs = mat->rmap->bs;
8157b952d6SSatish Balay 
82d64ed03dSBarry Smith   PetscFunctionBegin;
83aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
849566063dSJacob Faibussowitsch   PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap));
8548a46eb9SPierre Jolivet   for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES));
8648e59246SSatish Balay #else
879566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap));
88928fc39bSSatish Balay   for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1;
8948e59246SSatish Balay #endif
903a40ed3dSBarry Smith   PetscFunctionReturn(0);
9157b952d6SSatish Balay }
9257b952d6SSatish Balay 
93d40312a9SBarry Smith #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \
9480c1aa95SSatish Balay   { \
9580c1aa95SSatish Balay     brow = row / bs; \
969371c9d4SSatish Balay     rp   = aj + ai[brow]; \
979371c9d4SSatish Balay     ap   = aa + bs2 * ai[brow]; \
989371c9d4SSatish Balay     rmax = aimax[brow]; \
999371c9d4SSatish Balay     nrow = ailen[brow]; \
10080c1aa95SSatish Balay     bcol = col / bs; \
1019371c9d4SSatish Balay     ridx = row % bs; \
1029371c9d4SSatish Balay     cidx = col % bs; \
1039371c9d4SSatish Balay     low  = 0; \
1049371c9d4SSatish Balay     high = nrow; \
105ab26458aSBarry Smith     while (high - low > 3) { \
106ab26458aSBarry Smith       t = (low + high) / 2; \
107ab26458aSBarry Smith       if (rp[t] > bcol) high = t; \
108ab26458aSBarry Smith       else low = t; \
109ab26458aSBarry Smith     } \
110ab26458aSBarry Smith     for (_i = low; _i < high; _i++) { \
11180c1aa95SSatish Balay       if (rp[_i] > bcol) break; \
11280c1aa95SSatish Balay       if (rp[_i] == bcol) { \
11380c1aa95SSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
114eada6651SSatish Balay         if (addv == ADD_VALUES) *bap += value; \
115eada6651SSatish Balay         else *bap = value; \
116ac7a638eSSatish Balay         goto a_noinsert; \
11780c1aa95SSatish Balay       } \
11880c1aa95SSatish Balay     } \
11989280ab3SLois Curfman McInnes     if (a->nonew == 1) goto a_noinsert; \
1205f80ce2aSJacob Faibussowitsch     PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
121fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
12280c1aa95SSatish Balay     N = nrow++ - 1; \
12380c1aa95SSatish Balay     /* shift up all the later entries in this row */ \
1249566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
1259566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
1269566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
12780c1aa95SSatish Balay     rp[_i]                          = bcol; \
12880c1aa95SSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
129ac7a638eSSatish Balay   a_noinsert:; \
13080c1aa95SSatish Balay     ailen[brow] = nrow; \
13180c1aa95SSatish Balay   }
13257b952d6SSatish Balay 
133d40312a9SBarry Smith #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \
134ac7a638eSSatish Balay   { \
135ac7a638eSSatish Balay     brow = row / bs; \
1369371c9d4SSatish Balay     rp   = bj + bi[brow]; \
1379371c9d4SSatish Balay     ap   = ba + bs2 * bi[brow]; \
1389371c9d4SSatish Balay     rmax = bimax[brow]; \
1399371c9d4SSatish Balay     nrow = bilen[brow]; \
140ac7a638eSSatish Balay     bcol = col / bs; \
1419371c9d4SSatish Balay     ridx = row % bs; \
1429371c9d4SSatish Balay     cidx = col % bs; \
1439371c9d4SSatish Balay     low  = 0; \
1449371c9d4SSatish Balay     high = nrow; \
145ac7a638eSSatish Balay     while (high - low > 3) { \
146ac7a638eSSatish Balay       t = (low + high) / 2; \
147ac7a638eSSatish Balay       if (rp[t] > bcol) high = t; \
148ac7a638eSSatish Balay       else low = t; \
149ac7a638eSSatish Balay     } \
150ac7a638eSSatish Balay     for (_i = low; _i < high; _i++) { \
151ac7a638eSSatish Balay       if (rp[_i] > bcol) break; \
152ac7a638eSSatish Balay       if (rp[_i] == bcol) { \
153ac7a638eSSatish Balay         bap = ap + bs2 * _i + bs * cidx + ridx; \
154ac7a638eSSatish Balay         if (addv == ADD_VALUES) *bap += value; \
155ac7a638eSSatish Balay         else *bap = value; \
156ac7a638eSSatish Balay         goto b_noinsert; \
157ac7a638eSSatish Balay       } \
158ac7a638eSSatish Balay     } \
15989280ab3SLois Curfman McInnes     if (b->nonew == 1) goto b_noinsert; \
1605f80ce2aSJacob Faibussowitsch     PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column  (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
161fef13f97SBarry Smith     MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
162ac7a638eSSatish Balay     N = nrow++ - 1; \
163ac7a638eSSatish Balay     /* shift up all the later entries in this row */ \
1649566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
1659566063dSJacob Faibussowitsch     PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
1669566063dSJacob Faibussowitsch     PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
167ac7a638eSSatish Balay     rp[_i]                          = bcol; \
168ac7a638eSSatish Balay     ap[bs2 * _i + bs * cidx + ridx] = value; \
169ac7a638eSSatish Balay   b_noinsert:; \
170ac7a638eSSatish Balay     bilen[brow] = nrow; \
171ac7a638eSSatish Balay   }
172ac7a638eSSatish Balay 
1739371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
17457b952d6SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
17593fea6afSBarry Smith   MatScalar    value;
176ace3abfcSBarry Smith   PetscBool    roworiented = baij->roworiented;
177b24ad042SBarry Smith   PetscInt     i, j, row, col;
178d0f46423SBarry Smith   PetscInt     rstart_orig = mat->rmap->rstart;
179d0f46423SBarry Smith   PetscInt     rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
180d0f46423SBarry Smith   PetscInt     cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
18157b952d6SSatish Balay 
182eada6651SSatish Balay   /* Some Variables required in the macro */
18380c1aa95SSatish Balay   Mat          A     = baij->A;
18480c1aa95SSatish Balay   Mat_SeqBAIJ *a     = (Mat_SeqBAIJ *)(A)->data;
185b24ad042SBarry Smith   PetscInt    *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
1863eda8832SBarry Smith   MatScalar   *aa = a->a;
187ac7a638eSSatish Balay 
188ac7a638eSSatish Balay   Mat          B     = baij->B;
189ac7a638eSSatish Balay   Mat_SeqBAIJ *b     = (Mat_SeqBAIJ *)(B)->data;
190b24ad042SBarry Smith   PetscInt    *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
1913eda8832SBarry Smith   MatScalar   *ba = b->a;
192ac7a638eSSatish Balay 
193b24ad042SBarry Smith   PetscInt  *rp, ii, nrow, _i, rmax, N, brow, bcol;
194b24ad042SBarry Smith   PetscInt   low, high, t, ridx, cidx, bs2 = a->bs2;
1953eda8832SBarry Smith   MatScalar *ap, *bap;
19680c1aa95SSatish Balay 
197d64ed03dSBarry Smith   PetscFunctionBegin;
19857b952d6SSatish Balay   for (i = 0; i < m; i++) {
1995ef9f2a5SBarry Smith     if (im[i] < 0) continue;
2005f80ce2aSJacob Faibussowitsch     PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
20157b952d6SSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) {
20257b952d6SSatish Balay       row = im[i] - rstart_orig;
20357b952d6SSatish Balay       for (j = 0; j < n; j++) {
20457b952d6SSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig) {
20557b952d6SSatish Balay           col = in[j] - cstart_orig;
206db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
207db4deed7SKarl Rupp           else value = v[i + j * m];
208d40312a9SBarry Smith           MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
209f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
210f7d195e4SLawrence Mitchell           continue;
211f7d195e4SLawrence Mitchell         } else {
212f7d195e4SLawrence Mitchell           PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
21357b952d6SSatish Balay           if (mat->was_assembled) {
21448a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
215aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
2169566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col));
217bba1ac68SSatish Balay             col = col - 1;
21848e59246SSatish Balay #else
219bba1ac68SSatish Balay             col = baij->colmap[in[j] / bs] - 1;
22048e59246SSatish Balay #endif
221c9ef50b2SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
2229566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPIBAIJ(mat));
2238295de27SSatish Balay               col   = in[j];
2249bf004c3SSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
2259bf004c3SSatish Balay               B     = baij->B;
2269bf004c3SSatish Balay               b     = (Mat_SeqBAIJ *)(B)->data;
2279371c9d4SSatish Balay               bimax = b->imax;
2289371c9d4SSatish Balay               bi    = b->i;
2299371c9d4SSatish Balay               bilen = b->ilen;
2309371c9d4SSatish Balay               bj    = b->j;
2319bf004c3SSatish Balay               ba    = b->a;
232f7d195e4SLawrence Mitchell             } else {
233f7d195e4SLawrence Mitchell               PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
234f7d195e4SLawrence Mitchell               col += in[j] % bs;
235f7d195e4SLawrence Mitchell             }
2368295de27SSatish Balay           } else col = in[j];
237db4deed7SKarl Rupp           if (roworiented) value = v[i * n + j];
238db4deed7SKarl Rupp           else value = v[i + j * m];
239d40312a9SBarry Smith           MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
2409566063dSJacob Faibussowitsch           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
24157b952d6SSatish Balay         }
24257b952d6SSatish Balay       }
243d64ed03dSBarry Smith     } else {
2445f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
24590f02eecSBarry Smith       if (!baij->donotstash) {
2465080c13bSMatthew G Knepley         mat->assembled = PETSC_FALSE;
247ff2fd236SBarry Smith         if (roworiented) {
2489566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
249ff2fd236SBarry Smith         } else {
2509566063dSJacob Faibussowitsch           PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
25157b952d6SSatish Balay         }
25257b952d6SSatish Balay       }
25357b952d6SSatish Balay     }
25490f02eecSBarry Smith   }
2553a40ed3dSBarry Smith   PetscFunctionReturn(0);
25657b952d6SSatish Balay }
25757b952d6SSatish Balay 
2589371c9d4SSatish Balay static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) {
259880c6e6aSBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
2608ab52850SBarry Smith   PetscInt          *rp, low, high, t, ii, jj, nrow, i, rmax, N;
261880c6e6aSBarry Smith   PetscInt          *imax = a->imax, *ai = a->i, *ailen = a->ilen;
2628ab52850SBarry Smith   PetscInt          *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
263880c6e6aSBarry Smith   PetscBool          roworiented = a->roworiented;
264880c6e6aSBarry Smith   const PetscScalar *value       = v;
265880c6e6aSBarry Smith   MatScalar         *ap, *aa = a->a, *bap;
266880c6e6aSBarry Smith 
267880c6e6aSBarry Smith   PetscFunctionBegin;
268880c6e6aSBarry Smith   rp    = aj + ai[row];
269880c6e6aSBarry Smith   ap    = aa + bs2 * ai[row];
270880c6e6aSBarry Smith   rmax  = imax[row];
271880c6e6aSBarry Smith   nrow  = ailen[row];
2728ab52850SBarry Smith   value = v;
2738ab52850SBarry Smith   low   = 0;
2748ab52850SBarry Smith   high  = nrow;
275880c6e6aSBarry Smith   while (high - low > 7) {
276880c6e6aSBarry Smith     t = (low + high) / 2;
277880c6e6aSBarry Smith     if (rp[t] > col) high = t;
278880c6e6aSBarry Smith     else low = t;
279880c6e6aSBarry Smith   }
280880c6e6aSBarry Smith   for (i = low; i < high; i++) {
281880c6e6aSBarry Smith     if (rp[i] > col) break;
282880c6e6aSBarry Smith     if (rp[i] == col) {
283880c6e6aSBarry Smith       bap = ap + bs2 * i;
284880c6e6aSBarry Smith       if (roworiented) {
285880c6e6aSBarry Smith         if (is == ADD_VALUES) {
2868ab52850SBarry Smith           for (ii = 0; ii < bs; ii++) {
287ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
288880c6e6aSBarry Smith           }
289880c6e6aSBarry Smith         } else {
2908ab52850SBarry Smith           for (ii = 0; ii < bs; ii++) {
291ad540459SPierre Jolivet             for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
292880c6e6aSBarry Smith           }
293880c6e6aSBarry Smith         }
294880c6e6aSBarry Smith       } else {
295880c6e6aSBarry Smith         if (is == ADD_VALUES) {
2968ab52850SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
297ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
298880c6e6aSBarry Smith             bap += bs;
299880c6e6aSBarry Smith           }
300880c6e6aSBarry Smith         } else {
3018ab52850SBarry Smith           for (ii = 0; ii < bs; ii++, value += bs) {
302ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
303880c6e6aSBarry Smith             bap += bs;
304880c6e6aSBarry Smith           }
305880c6e6aSBarry Smith         }
306880c6e6aSBarry Smith       }
307880c6e6aSBarry Smith       goto noinsert2;
308880c6e6aSBarry Smith     }
309880c6e6aSBarry Smith   }
310880c6e6aSBarry Smith   if (nonew == 1) goto noinsert2;
3115f80ce2aSJacob Faibussowitsch   PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
312880c6e6aSBarry Smith   MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
3139371c9d4SSatish Balay   N = nrow++ - 1;
3149371c9d4SSatish Balay   high++;
315880c6e6aSBarry Smith   /* shift up all the later entries in this row */
3169566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
3179566063dSJacob Faibussowitsch   PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
318880c6e6aSBarry Smith   rp[i] = col;
319880c6e6aSBarry Smith   bap   = ap + bs2 * i;
320880c6e6aSBarry Smith   if (roworiented) {
3218ab52850SBarry Smith     for (ii = 0; ii < bs; ii++) {
322ad540459SPierre Jolivet       for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
323880c6e6aSBarry Smith     }
324880c6e6aSBarry Smith   } else {
3258ab52850SBarry Smith     for (ii = 0; ii < bs; ii++) {
326ad540459SPierre Jolivet       for (jj = 0; jj < bs; jj++) *bap++ = *value++;
327880c6e6aSBarry Smith     }
328880c6e6aSBarry Smith   }
329880c6e6aSBarry Smith noinsert2:;
330880c6e6aSBarry Smith   ailen[row] = nrow;
331880c6e6aSBarry Smith   PetscFunctionReturn(0);
332880c6e6aSBarry Smith }
333880c6e6aSBarry Smith 
3348ab52850SBarry Smith /*
3358ab52850SBarry Smith     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
3368ab52850SBarry Smith     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
3378ab52850SBarry Smith */
3389371c9d4SSatish Balay PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
339ab26458aSBarry Smith   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ *)mat->data;
340dd6ea824SBarry Smith   const PetscScalar *value;
341f15d580aSBarry Smith   MatScalar         *barray      = baij->barray;
342ace3abfcSBarry Smith   PetscBool          roworiented = baij->roworiented;
343899cda47SBarry Smith   PetscInt           i, j, ii, jj, row, col, rstart = baij->rstartbs;
344899cda47SBarry Smith   PetscInt           rend = baij->rendbs, cstart = baij->cstartbs, stepval;
345d0f46423SBarry Smith   PetscInt           cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
346ab26458aSBarry Smith 
347b16ae2b1SBarry Smith   PetscFunctionBegin;
34830793edcSSatish Balay   if (!barray) {
3499566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2, &barray));
35082502324SSatish Balay     baij->barray = barray;
35130793edcSSatish Balay   }
35230793edcSSatish Balay 
35326fbe8dcSKarl Rupp   if (roworiented) stepval = (n - 1) * bs;
35426fbe8dcSKarl Rupp   else stepval = (m - 1) * bs;
35526fbe8dcSKarl Rupp 
356ab26458aSBarry Smith   for (i = 0; i < m; i++) {
3575ef9f2a5SBarry Smith     if (im[i] < 0) continue;
3586bdcaf15SBarry Smith     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
359ab26458aSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
360ab26458aSBarry Smith       row = im[i] - rstart;
361ab26458aSBarry Smith       for (j = 0; j < n; j++) {
36215b57d14SSatish Balay         /* If NumCol = 1 then a copy is not required */
36315b57d14SSatish Balay         if ((roworiented) && (n == 1)) {
364f15d580aSBarry Smith           barray = (MatScalar *)v + i * bs2;
36515b57d14SSatish Balay         } else if ((!roworiented) && (m == 1)) {
366f15d580aSBarry Smith           barray = (MatScalar *)v + j * bs2;
36715b57d14SSatish Balay         } else { /* Here a copy is required */
368ab26458aSBarry Smith           if (roworiented) {
36953ef36baSBarry Smith             value = v + (i * (stepval + bs) + j) * bs;
370ab26458aSBarry Smith           } else {
37153ef36baSBarry Smith             value = v + (j * (stepval + bs) + i) * bs;
372abef11f7SSatish Balay           }
37353ef36baSBarry Smith           for (ii = 0; ii < bs; ii++, value += bs + stepval) {
37426fbe8dcSKarl Rupp             for (jj = 0; jj < bs; jj++) barray[jj] = value[jj];
37553ef36baSBarry Smith             barray += bs;
37647513183SBarry Smith           }
37730793edcSSatish Balay           barray -= bs2;
37815b57d14SSatish Balay         }
379abef11f7SSatish Balay 
380abef11f7SSatish Balay         if (in[j] >= cstart && in[j] < cend) {
381abef11f7SSatish Balay           col = in[j] - cstart;
3829566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
383f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
384f7d195e4SLawrence Mitchell           continue;
385f7d195e4SLawrence Mitchell         } else {
386f7d195e4SLawrence Mitchell           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
387ab26458aSBarry Smith           if (mat->was_assembled) {
38848a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
389a5eb4965SSatish Balay 
3902515c552SBarry Smith #if defined(PETSC_USE_DEBUG)
391aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
3929371c9d4SSatish Balay             {
3939371c9d4SSatish Balay               PetscInt data;
3949566063dSJacob Faibussowitsch               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
39508401ef6SPierre Jolivet               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
396fa46199cSSatish Balay             }
39748e59246SSatish Balay #else
39808401ef6SPierre Jolivet             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
399a5eb4965SSatish Balay #endif
40048e59246SSatish Balay #endif
401aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
4029566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
403fa46199cSSatish Balay             col = (col - 1) / bs;
40448e59246SSatish Balay #else
405a5eb4965SSatish Balay             col = (baij->colmap[in[j]] - 1) / bs;
40648e59246SSatish Balay #endif
4070e9bae81SBarry Smith             if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) {
4089566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPIBAIJ(mat));
409ab26458aSBarry Smith               col = in[j];
4105f80ce2aSJacob Faibussowitsch             } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]);
411db4deed7SKarl Rupp           } else col = in[j];
4129566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
413ab26458aSBarry Smith         }
414ab26458aSBarry Smith       }
415d64ed03dSBarry Smith     } else {
4165f80ce2aSJacob Faibussowitsch       PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
417ab26458aSBarry Smith       if (!baij->donotstash) {
418ff2fd236SBarry Smith         if (roworiented) {
4199566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
420ff2fd236SBarry Smith         } else {
4219566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
422ff2fd236SBarry Smith         }
423abef11f7SSatish Balay       }
424ab26458aSBarry Smith     }
425ab26458aSBarry Smith   }
4263a40ed3dSBarry Smith   PetscFunctionReturn(0);
427ab26458aSBarry Smith }
4286fa18ffdSBarry Smith 
4290bdbc534SSatish Balay #define HASH_KEY             0.6180339887
430b24ad042SBarry Smith #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp)))
431b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
432b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
4339371c9d4SSatish Balay PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
4340bdbc534SSatish Balay   Mat_MPIBAIJ *baij        = (Mat_MPIBAIJ *)mat->data;
435ace3abfcSBarry Smith   PetscBool    roworiented = baij->roworiented;
436b24ad042SBarry Smith   PetscInt     i, j, row, col;
437d0f46423SBarry Smith   PetscInt     rstart_orig = mat->rmap->rstart;
438d0f46423SBarry Smith   PetscInt     rend_orig = mat->rmap->rend, Nbs = baij->Nbs;
439d0f46423SBarry Smith   PetscInt     h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx;
440329f5518SBarry Smith   PetscReal    tmp;
4413eda8832SBarry Smith   MatScalar  **HD       = baij->hd, value;
442b24ad042SBarry Smith   PetscInt     total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
4430bdbc534SSatish Balay 
4440bdbc534SSatish Balay   PetscFunctionBegin;
4450bdbc534SSatish Balay   for (i = 0; i < m; i++) {
44676bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {
4475f80ce2aSJacob Faibussowitsch       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row");
4485f80ce2aSJacob Faibussowitsch       PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
44976bd3646SJed Brown     }
4500bdbc534SSatish Balay     row = im[i];
451c2760754SSatish Balay     if (row >= rstart_orig && row < rend_orig) {
4520bdbc534SSatish Balay       for (j = 0; j < n; j++) {
4530bdbc534SSatish Balay         col = in[j];
454db4deed7SKarl Rupp         if (roworiented) value = v[i * n + j];
455db4deed7SKarl Rupp         else value = v[i + j * m];
456b24ad042SBarry Smith         /* Look up PetscInto the Hash Table */
457c2760754SSatish Balay         key = (row / bs) * Nbs + (col / bs) + 1;
458c2760754SSatish Balay         h1  = HASH(size, key, tmp);
4590bdbc534SSatish Balay 
460c2760754SSatish Balay         idx = h1;
46176bd3646SJed Brown         if (PetscDefined(USE_DEBUG)) {
462187ce0cbSSatish Balay           insert_ct++;
463187ce0cbSSatish Balay           total_ct++;
464187ce0cbSSatish Balay           if (HT[idx] != key) {
4659371c9d4SSatish Balay             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
4669371c9d4SSatish Balay               ;
467187ce0cbSSatish Balay             if (idx == size) {
4689371c9d4SSatish Balay               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
4699371c9d4SSatish Balay                 ;
4705f80ce2aSJacob Faibussowitsch               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
471187ce0cbSSatish Balay             }
472187ce0cbSSatish Balay           }
47376bd3646SJed Brown         } else if (HT[idx] != key) {
4749371c9d4SSatish Balay           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
4759371c9d4SSatish Balay             ;
476c2760754SSatish Balay           if (idx == size) {
4779371c9d4SSatish Balay             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
4789371c9d4SSatish Balay               ;
4795f80ce2aSJacob Faibussowitsch             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
480c2760754SSatish Balay           }
481c2760754SSatish Balay         }
482c2760754SSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
483c2760754SSatish Balay         if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value;
484c2760754SSatish Balay         else *(HD[idx] + (col % bs) * bs + (row % bs)) = value;
4850bdbc534SSatish Balay       }
48626fbe8dcSKarl Rupp     } else if (!baij->donotstash) {
487ff2fd236SBarry Smith       if (roworiented) {
4889566063dSJacob Faibussowitsch         PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE));
489ff2fd236SBarry Smith       } else {
4909566063dSJacob Faibussowitsch         PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE));
4910bdbc534SSatish Balay       }
4920bdbc534SSatish Balay     }
4930bdbc534SSatish Balay   }
49476bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
495abf3b562SBarry Smith     baij->ht_total_ct += total_ct;
496abf3b562SBarry Smith     baij->ht_insert_ct += insert_ct;
49776bd3646SJed Brown   }
4980bdbc534SSatish Balay   PetscFunctionReturn(0);
4990bdbc534SSatish Balay }
5000bdbc534SSatish Balay 
5019371c9d4SSatish Balay PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) {
5020bdbc534SSatish Balay   Mat_MPIBAIJ       *baij        = (Mat_MPIBAIJ *)mat->data;
503ace3abfcSBarry Smith   PetscBool          roworiented = baij->roworiented;
504b24ad042SBarry Smith   PetscInt           i, j, ii, jj, row, col;
505899cda47SBarry Smith   PetscInt           rstart = baij->rstartbs;
506d0f46423SBarry Smith   PetscInt           rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2;
507b24ad042SBarry Smith   PetscInt           h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs;
508329f5518SBarry Smith   PetscReal          tmp;
5093eda8832SBarry Smith   MatScalar        **HD = baij->hd, *baij_a;
510dd6ea824SBarry Smith   const PetscScalar *v_t, *value;
511b24ad042SBarry Smith   PetscInt           total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct;
5120bdbc534SSatish Balay 
513d0a41580SSatish Balay   PetscFunctionBegin;
51426fbe8dcSKarl Rupp   if (roworiented) stepval = (n - 1) * bs;
51526fbe8dcSKarl Rupp   else stepval = (m - 1) * bs;
51626fbe8dcSKarl Rupp 
5170bdbc534SSatish Balay   for (i = 0; i < m; i++) {
51876bd3646SJed Brown     if (PetscDefined(USE_DEBUG)) {
5195f80ce2aSJacob Faibussowitsch       PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]);
5205f80ce2aSJacob Faibussowitsch       PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
52176bd3646SJed Brown     }
5220bdbc534SSatish Balay     row = im[i];
523ab715e2cSSatish Balay     v_t = v + i * nbs2;
524c2760754SSatish Balay     if (row >= rstart && row < rend) {
5250bdbc534SSatish Balay       for (j = 0; j < n; j++) {
5260bdbc534SSatish Balay         col = in[j];
5270bdbc534SSatish Balay 
5280bdbc534SSatish Balay         /* Look up into the Hash Table */
529c2760754SSatish Balay         key = row * Nbs + col + 1;
530c2760754SSatish Balay         h1  = HASH(size, key, tmp);
5310bdbc534SSatish Balay 
532c2760754SSatish Balay         idx = h1;
53376bd3646SJed Brown         if (PetscDefined(USE_DEBUG)) {
534187ce0cbSSatish Balay           total_ct++;
535187ce0cbSSatish Balay           insert_ct++;
536187ce0cbSSatish Balay           if (HT[idx] != key) {
5379371c9d4SSatish Balay             for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++)
5389371c9d4SSatish Balay               ;
539187ce0cbSSatish Balay             if (idx == size) {
5409371c9d4SSatish Balay               for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++)
5419371c9d4SSatish Balay                 ;
5425f80ce2aSJacob Faibussowitsch               PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
543187ce0cbSSatish Balay             }
544187ce0cbSSatish Balay           }
54576bd3646SJed Brown         } else if (HT[idx] != key) {
5469371c9d4SSatish Balay           for (idx = h1; (idx < size) && (HT[idx] != key); idx++)
5479371c9d4SSatish Balay             ;
548c2760754SSatish Balay           if (idx == size) {
5499371c9d4SSatish Balay             for (idx = 0; (idx < h1) && (HT[idx] != key); idx++)
5509371c9d4SSatish Balay               ;
5515f80ce2aSJacob Faibussowitsch             PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col);
552c2760754SSatish Balay           }
553c2760754SSatish Balay         }
554c2760754SSatish Balay         baij_a = HD[idx];
5550bdbc534SSatish Balay         if (roworiented) {
556c2760754SSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
557187ce0cbSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
558187ce0cbSSatish Balay           value = v_t;
559187ce0cbSSatish Balay           v_t += bs;
560fef45726SSatish Balay           if (addv == ADD_VALUES) {
561c2760754SSatish Balay             for (ii = 0; ii < bs; ii++, value += stepval) {
562ad540459SPierre Jolivet               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++;
563b4cc0f5aSSatish Balay             }
564fef45726SSatish Balay           } else {
565c2760754SSatish Balay             for (ii = 0; ii < bs; ii++, value += stepval) {
566ad540459SPierre Jolivet               for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++;
567fef45726SSatish Balay             }
568fef45726SSatish Balay           }
5690bdbc534SSatish Balay         } else {
5700bdbc534SSatish Balay           value = v + j * (stepval + bs) * bs + i * bs;
571fef45726SSatish Balay           if (addv == ADD_VALUES) {
572b4cc0f5aSSatish Balay             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
573ad540459SPierre Jolivet               for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++;
574fef45726SSatish Balay             }
575fef45726SSatish Balay           } else {
576fef45726SSatish Balay             for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) {
577ad540459SPierre Jolivet               for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++;
578b4cc0f5aSSatish Balay             }
5790bdbc534SSatish Balay           }
5800bdbc534SSatish Balay         }
5810bdbc534SSatish Balay       }
5820bdbc534SSatish Balay     } else {
5830bdbc534SSatish Balay       if (!baij->donotstash) {
5840bdbc534SSatish Balay         if (roworiented) {
5859566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
5860bdbc534SSatish Balay         } else {
5879566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
5880bdbc534SSatish Balay         }
5890bdbc534SSatish Balay       }
5900bdbc534SSatish Balay     }
5910bdbc534SSatish Balay   }
59276bd3646SJed Brown   if (PetscDefined(USE_DEBUG)) {
593abf3b562SBarry Smith     baij->ht_total_ct += total_ct;
594abf3b562SBarry Smith     baij->ht_insert_ct += insert_ct;
59576bd3646SJed Brown   }
5960bdbc534SSatish Balay   PetscFunctionReturn(0);
5970bdbc534SSatish Balay }
598133cdb44SSatish Balay 
5999371c9d4SSatish Balay PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) {
600d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
601d0f46423SBarry Smith   PetscInt     bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
602d0f46423SBarry Smith   PetscInt     bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
603d6de1c52SSatish Balay 
604133cdb44SSatish Balay   PetscFunctionBegin;
605d6de1c52SSatish Balay   for (i = 0; i < m; i++) {
60654c59aa7SJacob Faibussowitsch     if (idxm[i] < 0) continue; /* negative row */
60754c59aa7SJacob Faibussowitsch     PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
608d6de1c52SSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
609d6de1c52SSatish Balay       row = idxm[i] - bsrstart;
610d6de1c52SSatish Balay       for (j = 0; j < n; j++) {
61154c59aa7SJacob Faibussowitsch         if (idxn[j] < 0) continue; /* negative column */
61254c59aa7SJacob Faibussowitsch         PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
613d6de1c52SSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend) {
614d6de1c52SSatish Balay           col = idxn[j] - bscstart;
6159566063dSJacob Faibussowitsch           PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
616d64ed03dSBarry Smith         } else {
61748a46eb9SPierre Jolivet           if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
618aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
6199566063dSJacob Faibussowitsch           PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data));
620fa46199cSSatish Balay           data--;
62148e59246SSatish Balay #else
62248e59246SSatish Balay           data = baij->colmap[idxn[j] / bs] - 1;
62348e59246SSatish Balay #endif
62448e59246SSatish Balay           if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
625d9d09a02SSatish Balay           else {
62648e59246SSatish Balay             col = data + idxn[j] % bs;
6279566063dSJacob Faibussowitsch             PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
628d6de1c52SSatish Balay           }
629d6de1c52SSatish Balay         }
630d6de1c52SSatish Balay       }
631f23aa3ddSBarry Smith     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
632d6de1c52SSatish Balay   }
6333a40ed3dSBarry Smith   PetscFunctionReturn(0);
634d6de1c52SSatish Balay }
635d6de1c52SSatish Balay 
6369371c9d4SSatish Balay PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) {
637d6de1c52SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
638d6de1c52SSatish Balay   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data;
639d0f46423SBarry Smith   PetscInt     i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col;
640329f5518SBarry Smith   PetscReal    sum = 0.0;
6413eda8832SBarry Smith   MatScalar   *v;
642d6de1c52SSatish Balay 
643d64ed03dSBarry Smith   PetscFunctionBegin;
644d6de1c52SSatish Balay   if (baij->size == 1) {
6459566063dSJacob Faibussowitsch     PetscCall(MatNorm(baij->A, type, nrm));
646d6de1c52SSatish Balay   } else {
647d6de1c52SSatish Balay     if (type == NORM_FROBENIUS) {
648d6de1c52SSatish Balay       v  = amat->a;
6498a62d963SHong Zhang       nz = amat->nz * bs2;
6508a62d963SHong Zhang       for (i = 0; i < nz; i++) {
6519371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
6529371c9d4SSatish Balay         v++;
653d6de1c52SSatish Balay       }
654d6de1c52SSatish Balay       v  = bmat->a;
6558a62d963SHong Zhang       nz = bmat->nz * bs2;
6568a62d963SHong Zhang       for (i = 0; i < nz; i++) {
6579371c9d4SSatish Balay         sum += PetscRealPart(PetscConj(*v) * (*v));
6589371c9d4SSatish Balay         v++;
659d6de1c52SSatish Balay       }
6601c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
6618f1a2a5eSBarry Smith       *nrm = PetscSqrtReal(*nrm);
6628a62d963SHong Zhang     } else if (type == NORM_1) { /* max column sum */
6638a62d963SHong Zhang       PetscReal *tmp, *tmp2;
664899cda47SBarry Smith       PetscInt  *jj, *garray = baij->garray, cstart = baij->rstartbs;
6659566063dSJacob Faibussowitsch       PetscCall(PetscCalloc1(mat->cmap->N, &tmp));
6669566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(mat->cmap->N, &tmp2));
6679371c9d4SSatish Balay       v  = amat->a;
6689371c9d4SSatish Balay       jj = amat->j;
6698a62d963SHong Zhang       for (i = 0; i < amat->nz; i++) {
6708a62d963SHong Zhang         for (j = 0; j < bs; j++) {
6718a62d963SHong Zhang           col = bs * (cstart + *jj) + j; /* column index */
6728a62d963SHong Zhang           for (row = 0; row < bs; row++) {
6739371c9d4SSatish Balay             tmp[col] += PetscAbsScalar(*v);
6749371c9d4SSatish Balay             v++;
6758a62d963SHong Zhang           }
6768a62d963SHong Zhang         }
6778a62d963SHong Zhang         jj++;
6788a62d963SHong Zhang       }
6799371c9d4SSatish Balay       v  = bmat->a;
6809371c9d4SSatish Balay       jj = bmat->j;
6818a62d963SHong Zhang       for (i = 0; i < bmat->nz; i++) {
6828a62d963SHong Zhang         for (j = 0; j < bs; j++) {
6838a62d963SHong Zhang           col = bs * garray[*jj] + j;
6848a62d963SHong Zhang           for (row = 0; row < bs; row++) {
6859371c9d4SSatish Balay             tmp[col] += PetscAbsScalar(*v);
6869371c9d4SSatish Balay             v++;
6878a62d963SHong Zhang           }
6888a62d963SHong Zhang         }
6898a62d963SHong Zhang         jj++;
6908a62d963SHong Zhang       }
6911c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
6928a62d963SHong Zhang       *nrm = 0.0;
693d0f46423SBarry Smith       for (j = 0; j < mat->cmap->N; j++) {
6948a62d963SHong Zhang         if (tmp2[j] > *nrm) *nrm = tmp2[j];
6958a62d963SHong Zhang       }
6969566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp));
6979566063dSJacob Faibussowitsch       PetscCall(PetscFree(tmp2));
6988a62d963SHong Zhang     } else if (type == NORM_INFINITY) { /* max row sum */
699577dd1f9SKris Buschelman       PetscReal *sums;
7009566063dSJacob Faibussowitsch       PetscCall(PetscMalloc1(bs, &sums));
7018a62d963SHong Zhang       sum = 0.0;
7028a62d963SHong Zhang       for (j = 0; j < amat->mbs; j++) {
7038a62d963SHong Zhang         for (row = 0; row < bs; row++) sums[row] = 0.0;
7048a62d963SHong Zhang         v  = amat->a + bs2 * amat->i[j];
7058a62d963SHong Zhang         nz = amat->i[j + 1] - amat->i[j];
7068a62d963SHong Zhang         for (i = 0; i < nz; i++) {
7078a62d963SHong Zhang           for (col = 0; col < bs; col++) {
7088a62d963SHong Zhang             for (row = 0; row < bs; row++) {
7099371c9d4SSatish Balay               sums[row] += PetscAbsScalar(*v);
7109371c9d4SSatish Balay               v++;
7118a62d963SHong Zhang             }
7128a62d963SHong Zhang           }
7138a62d963SHong Zhang         }
7148a62d963SHong Zhang         v  = bmat->a + bs2 * bmat->i[j];
7158a62d963SHong Zhang         nz = bmat->i[j + 1] - bmat->i[j];
7168a62d963SHong Zhang         for (i = 0; i < nz; i++) {
7178a62d963SHong Zhang           for (col = 0; col < bs; col++) {
7188a62d963SHong Zhang             for (row = 0; row < bs; row++) {
7199371c9d4SSatish Balay               sums[row] += PetscAbsScalar(*v);
7209371c9d4SSatish Balay               v++;
7218a62d963SHong Zhang             }
7228a62d963SHong Zhang           }
7238a62d963SHong Zhang         }
7248a62d963SHong Zhang         for (row = 0; row < bs; row++) {
7258a62d963SHong Zhang           if (sums[row] > sum) sum = sums[row];
7268a62d963SHong Zhang         }
7278a62d963SHong Zhang       }
7281c2dc1cbSBarry Smith       PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat)));
7299566063dSJacob Faibussowitsch       PetscCall(PetscFree(sums));
730ce94432eSBarry Smith     } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet");
731d64ed03dSBarry Smith   }
7323a40ed3dSBarry Smith   PetscFunctionReturn(0);
733d6de1c52SSatish Balay }
73457b952d6SSatish Balay 
735fef45726SSatish Balay /*
736fef45726SSatish Balay   Creates the hash table, and sets the table
737fef45726SSatish Balay   This table is created only once.
738fef45726SSatish Balay   If new entried need to be added to the matrix
739fef45726SSatish Balay   then the hash table has to be destroyed and
740fef45726SSatish Balay   recreated.
741fef45726SSatish Balay */
7429371c9d4SSatish Balay PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) {
743596b8d2eSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
744596b8d2eSBarry Smith   Mat          A = baij->A, B = baij->B;
745596b8d2eSBarry Smith   Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data;
746b24ad042SBarry Smith   PetscInt     i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
747fca92195SBarry Smith   PetscInt     ht_size, bs2 = baij->bs2, rstart = baij->rstartbs;
748899cda47SBarry Smith   PetscInt     cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs;
749b24ad042SBarry Smith   PetscInt    *HT, key;
7503eda8832SBarry Smith   MatScalar  **HD;
751329f5518SBarry Smith   PetscReal    tmp;
7526cf91177SBarry Smith #if defined(PETSC_USE_INFO)
753b24ad042SBarry Smith   PetscInt ct = 0, max = 0;
7544a15367fSSatish Balay #endif
755fef45726SSatish Balay 
756d64ed03dSBarry Smith   PetscFunctionBegin;
757fca92195SBarry Smith   if (baij->ht) PetscFunctionReturn(0);
758fef45726SSatish Balay 
759fca92195SBarry Smith   baij->ht_size = (PetscInt)(factor * nz);
760fca92195SBarry Smith   ht_size       = baij->ht_size;
7610bdbc534SSatish Balay 
762fef45726SSatish Balay   /* Allocate Memory for Hash Table */
7639566063dSJacob Faibussowitsch   PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht));
764b9e4cc15SSatish Balay   HD = baij->hd;
765a07cd24cSSatish Balay   HT = baij->ht;
766b9e4cc15SSatish Balay 
767596b8d2eSBarry Smith   /* Loop Over A */
7680bdbc534SSatish Balay   for (i = 0; i < a->mbs; i++) {
769596b8d2eSBarry Smith     for (j = ai[i]; j < ai[i + 1]; j++) {
7700bdbc534SSatish Balay       row = i + rstart;
7710bdbc534SSatish Balay       col = aj[j] + cstart;
772596b8d2eSBarry Smith 
773187ce0cbSSatish Balay       key = row * Nbs + col + 1;
774fca92195SBarry Smith       h1  = HASH(ht_size, key, tmp);
775fca92195SBarry Smith       for (k = 0; k < ht_size; k++) {
776fca92195SBarry Smith         if (!HT[(h1 + k) % ht_size]) {
777fca92195SBarry Smith           HT[(h1 + k) % ht_size] = key;
778fca92195SBarry Smith           HD[(h1 + k) % ht_size] = a->a + j * bs2;
779596b8d2eSBarry Smith           break;
7806cf91177SBarry Smith #if defined(PETSC_USE_INFO)
781187ce0cbSSatish Balay         } else {
782187ce0cbSSatish Balay           ct++;
783187ce0cbSSatish Balay #endif
784596b8d2eSBarry Smith         }
785187ce0cbSSatish Balay       }
7866cf91177SBarry Smith #if defined(PETSC_USE_INFO)
787187ce0cbSSatish Balay       if (k > max) max = k;
788187ce0cbSSatish Balay #endif
789596b8d2eSBarry Smith     }
790596b8d2eSBarry Smith   }
791596b8d2eSBarry Smith   /* Loop Over B */
7920bdbc534SSatish Balay   for (i = 0; i < b->mbs; i++) {
793596b8d2eSBarry Smith     for (j = bi[i]; j < bi[i + 1]; j++) {
7940bdbc534SSatish Balay       row = i + rstart;
7950bdbc534SSatish Balay       col = garray[bj[j]];
796187ce0cbSSatish Balay       key = row * Nbs + col + 1;
797fca92195SBarry Smith       h1  = HASH(ht_size, key, tmp);
798fca92195SBarry Smith       for (k = 0; k < ht_size; k++) {
799fca92195SBarry Smith         if (!HT[(h1 + k) % ht_size]) {
800fca92195SBarry Smith           HT[(h1 + k) % ht_size] = key;
801fca92195SBarry Smith           HD[(h1 + k) % ht_size] = b->a + j * bs2;
802596b8d2eSBarry Smith           break;
8036cf91177SBarry Smith #if defined(PETSC_USE_INFO)
804187ce0cbSSatish Balay         } else {
805187ce0cbSSatish Balay           ct++;
806187ce0cbSSatish Balay #endif
807596b8d2eSBarry Smith         }
808187ce0cbSSatish Balay       }
8096cf91177SBarry Smith #if defined(PETSC_USE_INFO)
810187ce0cbSSatish Balay       if (k > max) max = k;
811187ce0cbSSatish Balay #endif
812596b8d2eSBarry Smith     }
813596b8d2eSBarry Smith   }
814596b8d2eSBarry Smith 
815596b8d2eSBarry Smith   /* Print Summary */
8166cf91177SBarry Smith #if defined(PETSC_USE_INFO)
817fca92195SBarry Smith   for (i = 0, j = 0; i < ht_size; i++) {
81826fbe8dcSKarl Rupp     if (HT[i]) j++;
819c38d4ed2SBarry Smith   }
8209566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max));
821187ce0cbSSatish Balay #endif
8223a40ed3dSBarry Smith   PetscFunctionReturn(0);
823596b8d2eSBarry Smith }
82457b952d6SSatish Balay 
8259371c9d4SSatish Balay PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) {
826bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
827b24ad042SBarry Smith   PetscInt     nstash, reallocs;
828bbb85fb3SSatish Balay 
829bbb85fb3SSatish Balay   PetscFunctionBegin;
83026fbe8dcSKarl Rupp   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
831bbb85fb3SSatish Balay 
8329566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
8339566063dSJacob Faibussowitsch   PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
8349566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
8359566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
8369566063dSJacob Faibussowitsch   PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs));
8379566063dSJacob Faibussowitsch   PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
838bbb85fb3SSatish Balay   PetscFunctionReturn(0);
839bbb85fb3SSatish Balay }
840bbb85fb3SSatish Balay 
8419371c9d4SSatish Balay PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) {
842bbb85fb3SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
84391c97fd4SSatish Balay   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ *)baij->A->data;
844b24ad042SBarry Smith   PetscInt     i, j, rstart, ncols, flg, bs2 = baij->bs2;
845e44c0bd4SBarry Smith   PetscInt    *row, *col;
846ace3abfcSBarry Smith   PetscBool    r1, r2, r3, other_disassembled;
8473eda8832SBarry Smith   MatScalar   *val;
848b24ad042SBarry Smith   PetscMPIInt  n;
849bbb85fb3SSatish Balay 
850bbb85fb3SSatish Balay   PetscFunctionBegin;
8515fd66863SKarl Rupp   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
8524cb17eb5SBarry Smith   if (!baij->donotstash && !mat->nooffprocentries) {
853a2d1c673SSatish Balay     while (1) {
8549566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
855a2d1c673SSatish Balay       if (!flg) break;
856a2d1c673SSatish Balay 
857bbb85fb3SSatish Balay       for (i = 0; i < n;) {
858bbb85fb3SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
85926fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
86026fbe8dcSKarl Rupp           if (row[j] != rstart) break;
86126fbe8dcSKarl Rupp         }
862bbb85fb3SSatish Balay         if (j < n) ncols = j - i;
863bbb85fb3SSatish Balay         else ncols = n - i;
864bbb85fb3SSatish Balay         /* Now assemble all these values with a single function call */
8659566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
866bbb85fb3SSatish Balay         i = j;
867bbb85fb3SSatish Balay       }
868bbb85fb3SSatish Balay     }
8699566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->stash));
870a2d1c673SSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
871a2d1c673SSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
872a2d1c673SSatish Balay        restore the original flags */
873a2d1c673SSatish Balay     r1 = baij->roworiented;
874a2d1c673SSatish Balay     r2 = a->roworiented;
87591c97fd4SSatish Balay     r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
87626fbe8dcSKarl Rupp 
8777c922b88SBarry Smith     baij->roworiented = PETSC_FALSE;
8787c922b88SBarry Smith     a->roworiented    = PETSC_FALSE;
87926fbe8dcSKarl Rupp 
88091c97fd4SSatish Balay     (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
881a2d1c673SSatish Balay     while (1) {
8829566063dSJacob Faibussowitsch       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
883a2d1c673SSatish Balay       if (!flg) break;
884a2d1c673SSatish Balay 
885a2d1c673SSatish Balay       for (i = 0; i < n;) {
886a2d1c673SSatish Balay         /* Now identify the consecutive vals belonging to the same row */
88726fbe8dcSKarl Rupp         for (j = i, rstart = row[j]; j < n; j++) {
88826fbe8dcSKarl Rupp           if (row[j] != rstart) break;
88926fbe8dcSKarl Rupp         }
890a2d1c673SSatish Balay         if (j < n) ncols = j - i;
891a2d1c673SSatish Balay         else ncols = n - i;
8929566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
893a2d1c673SSatish Balay         i = j;
894a2d1c673SSatish Balay       }
895a2d1c673SSatish Balay     }
8969566063dSJacob Faibussowitsch     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
89726fbe8dcSKarl Rupp 
898a2d1c673SSatish Balay     baij->roworiented = r1;
899a2d1c673SSatish Balay     a->roworiented    = r2;
90026fbe8dcSKarl Rupp 
90191c97fd4SSatish Balay     ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */
902bbb85fb3SSatish Balay   }
903bbb85fb3SSatish Balay 
9049566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->A, mode));
9059566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->A, mode));
906bbb85fb3SSatish Balay 
907bbb85fb3SSatish Balay   /* determine if any processor has disassembled, if so we must
9086aad120cSJose E. Roman      also disassemble ourselves, in order that we may reassemble. */
909bbb85fb3SSatish Balay   /*
910bbb85fb3SSatish Balay      if nonzero structure of submatrix B cannot change then we know that
911bbb85fb3SSatish Balay      no processor disassembled thus we can skip this stuff
912bbb85fb3SSatish Balay   */
913bbb85fb3SSatish Balay   if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
9145f9db2b2SJunchao Zhang     PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
91548a46eb9SPierre Jolivet     if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat));
916bbb85fb3SSatish Balay   }
917bbb85fb3SSatish Balay 
91848a46eb9SPierre Jolivet   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat));
9199566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(baij->B, mode));
9209566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(baij->B, mode));
921bbb85fb3SSatish Balay 
9226cf91177SBarry Smith #if defined(PETSC_USE_INFO)
923bbb85fb3SSatish Balay   if (baij->ht && mode == MAT_FINAL_ASSEMBLY) {
9249566063dSJacob Faibussowitsch     PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct));
92526fbe8dcSKarl Rupp 
926bbb85fb3SSatish Balay     baij->ht_total_ct  = 0;
927bbb85fb3SSatish Balay     baij->ht_insert_ct = 0;
928bbb85fb3SSatish Balay   }
929bbb85fb3SSatish Balay #endif
930bbb85fb3SSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
9319566063dSJacob Faibussowitsch     PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact));
93226fbe8dcSKarl Rupp 
933bbb85fb3SSatish Balay     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
934bbb85fb3SSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
935bbb85fb3SSatish Balay   }
936bbb85fb3SSatish Balay 
9379566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
93826fbe8dcSKarl Rupp 
939f4259b30SLisandro Dalcin   baij->rowvalues = NULL;
9404f9cfa9eSBarry Smith 
9414f9cfa9eSBarry Smith   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
9424f9cfa9eSBarry Smith   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
943e56f5c9eSBarry Smith     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
9441c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
945e56f5c9eSBarry Smith   }
946bbb85fb3SSatish Balay   PetscFunctionReturn(0);
947bbb85fb3SSatish Balay }
94857b952d6SSatish Balay 
9497da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer);
9509804daf3SBarry Smith #include <petscdraw.h>
9519371c9d4SSatish Balay static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) {
95257b952d6SSatish Balay   Mat_MPIBAIJ      *baij = (Mat_MPIBAIJ *)mat->data;
9537da1fb6eSBarry Smith   PetscMPIInt       rank = baij->rank;
954d0f46423SBarry Smith   PetscInt          bs   = mat->rmap->bs;
955ace3abfcSBarry Smith   PetscBool         iascii, isdraw;
956b0a32e0cSBarry Smith   PetscViewer       sviewer;
957f3ef73ceSBarry Smith   PetscViewerFormat format;
95857b952d6SSatish Balay 
959d64ed03dSBarry Smith   PetscFunctionBegin;
9609566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
9619566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
96232077d6dSBarry Smith   if (iascii) {
9639566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
964456192e2SBarry Smith     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
9654e220ebcSLois Curfman McInnes       MatInfo info;
9669566063dSJacob Faibussowitsch       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
9679566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
9689566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
9699371c9d4SSatish Balay       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
9709371c9d4SSatish Balay                                                    mat->rmap->bs, (double)info.memory));
9719566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
9729566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
9739566063dSJacob Faibussowitsch       PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
9749566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
9759566063dSJacob Faibussowitsch       PetscCall(PetscViewerFlush(viewer));
9769566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
9779566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
9789566063dSJacob Faibussowitsch       PetscCall(VecScatterView(baij->Mvctx, viewer));
9793a40ed3dSBarry Smith       PetscFunctionReturn(0);
980fb9695e5SSatish Balay     } else if (format == PETSC_VIEWER_ASCII_INFO) {
9819566063dSJacob Faibussowitsch       PetscCall(PetscViewerASCIIPrintf(viewer, "  block size is %" PetscInt_FMT "\n", bs));
9823a40ed3dSBarry Smith       PetscFunctionReturn(0);
98304929863SHong Zhang     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
98404929863SHong Zhang       PetscFunctionReturn(0);
98557b952d6SSatish Balay     }
98657b952d6SSatish Balay   }
98757b952d6SSatish Balay 
9880f5bd95cSBarry Smith   if (isdraw) {
989b0a32e0cSBarry Smith     PetscDraw draw;
990ace3abfcSBarry Smith     PetscBool isnull;
9919566063dSJacob Faibussowitsch     PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
9929566063dSJacob Faibussowitsch     PetscCall(PetscDrawIsNull(draw, &isnull));
99345f3bb6eSLisandro Dalcin     if (isnull) PetscFunctionReturn(0);
99457b952d6SSatish Balay   }
99557b952d6SSatish Balay 
9967da1fb6eSBarry Smith   {
99757b952d6SSatish Balay     /* assemble the entire matrix onto first processor. */
99857b952d6SSatish Balay     Mat          A;
99957b952d6SSatish Balay     Mat_SeqBAIJ *Aloc;
1000d0f46423SBarry Smith     PetscInt     M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
10013eda8832SBarry Smith     MatScalar   *a;
10023e219373SBarry Smith     const char  *matname;
100357b952d6SSatish Balay 
1004f204ca49SKris Buschelman     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1005f204ca49SKris Buschelman     /* Perhaps this should be the type of mat? */
10069566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
1007dd400576SPatrick Sanan     if (rank == 0) {
10089566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, M, N, M, N));
1009d64ed03dSBarry Smith     } else {
10109566063dSJacob Faibussowitsch       PetscCall(MatSetSizes(A, 0, 0, M, N));
101157b952d6SSatish Balay     }
10129566063dSJacob Faibussowitsch     PetscCall(MatSetType(A, MATMPIBAIJ));
10139566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
10149566063dSJacob Faibussowitsch     PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
101557b952d6SSatish Balay 
101657b952d6SSatish Balay     /* copy over the A part */
101757b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ *)baij->A->data;
10189371c9d4SSatish Balay     ai   = Aloc->i;
10199371c9d4SSatish Balay     aj   = Aloc->j;
10209371c9d4SSatish Balay     a    = Aloc->a;
10219566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs, &rvals));
102257b952d6SSatish Balay 
102357b952d6SSatish Balay     for (i = 0; i < mbs; i++) {
1024899cda47SBarry Smith       rvals[0] = bs * (baij->rstartbs + i);
102526fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
102657b952d6SSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
1027899cda47SBarry Smith         col = (baij->cstartbs + aj[j]) * bs;
102857b952d6SSatish Balay         for (k = 0; k < bs; k++) {
10299566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
10309371c9d4SSatish Balay           col++;
10319371c9d4SSatish Balay           a += bs;
103257b952d6SSatish Balay         }
103357b952d6SSatish Balay       }
103457b952d6SSatish Balay     }
103557b952d6SSatish Balay     /* copy over the B part */
103657b952d6SSatish Balay     Aloc = (Mat_SeqBAIJ *)baij->B->data;
10379371c9d4SSatish Balay     ai   = Aloc->i;
10389371c9d4SSatish Balay     aj   = Aloc->j;
10399371c9d4SSatish Balay     a    = Aloc->a;
104057b952d6SSatish Balay     for (i = 0; i < mbs; i++) {
1041899cda47SBarry Smith       rvals[0] = bs * (baij->rstartbs + i);
104226fbe8dcSKarl Rupp       for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
104357b952d6SSatish Balay       for (j = ai[i]; j < ai[i + 1]; j++) {
104457b952d6SSatish Balay         col = baij->garray[aj[j]] * bs;
104557b952d6SSatish Balay         for (k = 0; k < bs; k++) {
10469566063dSJacob Faibussowitsch           PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
10479371c9d4SSatish Balay           col++;
10489371c9d4SSatish Balay           a += bs;
104957b952d6SSatish Balay         }
105057b952d6SSatish Balay       }
105157b952d6SSatish Balay     }
10529566063dSJacob Faibussowitsch     PetscCall(PetscFree(rvals));
10539566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
10549566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
105555843e3eSBarry Smith     /*
105655843e3eSBarry Smith        Everyone has to call to draw the matrix since the graphics waits are
1057b0a32e0cSBarry Smith        synchronized across all processors that share the PetscDraw object
105855843e3eSBarry Smith     */
10599566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
10609566063dSJacob Faibussowitsch     PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1061dd400576SPatrick Sanan     if (rank == 0) {
10629566063dSJacob Faibussowitsch       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname));
10639566063dSJacob Faibussowitsch       PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer));
106457b952d6SSatish Balay     }
10659566063dSJacob Faibussowitsch     PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
10669566063dSJacob Faibussowitsch     PetscCall(PetscViewerFlush(viewer));
10679566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&A));
106857b952d6SSatish Balay   }
10693a40ed3dSBarry Smith   PetscFunctionReturn(0);
107057b952d6SSatish Balay }
107157b952d6SSatish Balay 
1072618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
10739371c9d4SSatish Balay PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) {
1074b51a4376SLisandro Dalcin   Mat_MPIBAIJ    *aij    = (Mat_MPIBAIJ *)mat->data;
1075b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *A      = (Mat_SeqBAIJ *)aij->A->data;
1076b51a4376SLisandro Dalcin   Mat_SeqBAIJ    *B      = (Mat_SeqBAIJ *)aij->B->data;
1077b51a4376SLisandro Dalcin   const PetscInt *garray = aij->garray;
1078b51a4376SLisandro Dalcin   PetscInt        header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l;
1079b51a4376SLisandro Dalcin   PetscInt       *rowlens, *colidxs;
1080b51a4376SLisandro Dalcin   PetscScalar    *matvals;
1081660746e0SBarry Smith 
1082660746e0SBarry Smith   PetscFunctionBegin;
10839566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
1084b51a4376SLisandro Dalcin 
1085b51a4376SLisandro Dalcin   M  = mat->rmap->N;
1086b51a4376SLisandro Dalcin   N  = mat->cmap->N;
1087b51a4376SLisandro Dalcin   m  = mat->rmap->n;
1088b51a4376SLisandro Dalcin   rs = mat->rmap->rstart;
1089b51a4376SLisandro Dalcin   cs = mat->cmap->rstart;
1090b51a4376SLisandro Dalcin   bs = mat->rmap->bs;
1091b51a4376SLisandro Dalcin   nz = bs * bs * (A->nz + B->nz);
1092b51a4376SLisandro Dalcin 
1093b51a4376SLisandro Dalcin   /* write matrix header */
1094660746e0SBarry Smith   header[0] = MAT_FILE_CLASSID;
10959371c9d4SSatish Balay   header[1] = M;
10969371c9d4SSatish Balay   header[2] = N;
10979371c9d4SSatish Balay   header[3] = nz;
10989566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat)));
10999566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT));
1100660746e0SBarry Smith 
1101b51a4376SLisandro Dalcin   /* fill in and store row lengths */
11029566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &rowlens));
1103b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++)
11049371c9d4SSatish Balay     for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]);
11059566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT));
11069566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowlens));
1107660746e0SBarry Smith 
1108b51a4376SLisandro Dalcin   /* fill in and store column indices */
11099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &colidxs));
1110b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++) {
1111b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++) {
1112b51a4376SLisandro Dalcin       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1113b51a4376SLisandro Dalcin         if (garray[B->j[jb]] > cs / bs) break;
11149371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1115660746e0SBarry Smith       }
1116b51a4376SLisandro Dalcin       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
11179371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs;
1118b51a4376SLisandro Dalcin       for (; jb < B->i[i + 1]; jb++)
11199371c9d4SSatish Balay         for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l;
1120660746e0SBarry Smith     }
1121660746e0SBarry Smith   }
11225f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz);
11239566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT));
11249566063dSJacob Faibussowitsch   PetscCall(PetscFree(colidxs));
1125660746e0SBarry Smith 
1126b51a4376SLisandro Dalcin   /* fill in and store nonzero values */
11279566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(nz, &matvals));
1128b51a4376SLisandro Dalcin   for (cnt = 0, i = 0; i < A->mbs; i++) {
1129b51a4376SLisandro Dalcin     for (k = 0; k < bs; k++) {
1130b51a4376SLisandro Dalcin       for (jb = B->i[i]; jb < B->i[i + 1]; jb++) {
1131b51a4376SLisandro Dalcin         if (garray[B->j[jb]] > cs / bs) break;
11329371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1133660746e0SBarry Smith       }
1134b51a4376SLisandro Dalcin       for (ja = A->i[i]; ja < A->i[i + 1]; ja++)
11359371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k];
1136b51a4376SLisandro Dalcin       for (; jb < B->i[i + 1]; jb++)
11379371c9d4SSatish Balay         for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k];
1138660746e0SBarry Smith     }
1139b51a4376SLisandro Dalcin   }
11409566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR));
11419566063dSJacob Faibussowitsch   PetscCall(PetscFree(matvals));
1142660746e0SBarry Smith 
1143b51a4376SLisandro Dalcin   /* write block size option to the viewer's .info file */
11449566063dSJacob Faibussowitsch   PetscCall(MatView_Binary_BlockSizes(mat, viewer));
1145660746e0SBarry Smith   PetscFunctionReturn(0);
1146660746e0SBarry Smith }
1147660746e0SBarry Smith 
11489371c9d4SSatish Balay PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) {
1149ace3abfcSBarry Smith   PetscBool iascii, isdraw, issocket, isbinary;
115057b952d6SSatish Balay 
1151d64ed03dSBarry Smith   PetscFunctionBegin;
11529566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
11539566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
11549566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
11559566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1156660746e0SBarry Smith   if (iascii || isdraw || issocket) {
11579566063dSJacob Faibussowitsch     PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer));
11581baa6e33SBarry Smith   } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer));
11593a40ed3dSBarry Smith   PetscFunctionReturn(0);
116057b952d6SSatish Balay }
116157b952d6SSatish Balay 
11629371c9d4SSatish Balay PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) {
116379bdfe76SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
116479bdfe76SSatish Balay 
1165d64ed03dSBarry Smith   PetscFunctionBegin;
1166aa482453SBarry Smith #if defined(PETSC_USE_LOG)
1167c0aa6a63SJacob Faibussowitsch   PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N);
116879bdfe76SSatish Balay #endif
11699566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->stash));
11709566063dSJacob Faibussowitsch   PetscCall(MatStashDestroy_Private(&mat->bstash));
11719566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&baij->A));
11729566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&baij->B));
1173aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
11749566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&baij->colmap));
117548e59246SSatish Balay #else
11769566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->colmap));
117748e59246SSatish Balay #endif
11789566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->garray));
11799566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&baij->lvec));
11809566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&baij->Mvctx));
11819566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
11829566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->barray));
11839566063dSJacob Faibussowitsch   PetscCall(PetscFree2(baij->hd, baij->ht));
11849566063dSJacob Faibussowitsch   PetscCall(PetscFree(baij->rangebs));
11859566063dSJacob Faibussowitsch   PetscCall(PetscFree(mat->data));
1186901853e0SKris Buschelman 
11879566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
11889566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
11899566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
11909566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL));
11919566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL));
11929566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL));
11939566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL));
11949566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL));
11952e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL));
11962e956fe4SStefano Zampini   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL));
11977ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
11989566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL));
11997ea3e4caSstefano_zampini #endif
12009566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL));
12013a40ed3dSBarry Smith   PetscFunctionReturn(0);
120279bdfe76SSatish Balay }
120379bdfe76SSatish Balay 
12049371c9d4SSatish Balay PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) {
1205cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1206b24ad042SBarry Smith   PetscInt     nt;
1207cee3aa6bSSatish Balay 
1208d64ed03dSBarry Smith   PetscFunctionBegin;
12099566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(xx, &nt));
12105f80ce2aSJacob Faibussowitsch   PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx");
12119566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(yy, &nt));
12125f80ce2aSJacob Faibussowitsch   PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy");
12139566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
12149566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->mult)(a->A, xx, yy));
12159566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
12169566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy));
12173a40ed3dSBarry Smith   PetscFunctionReturn(0);
1218cee3aa6bSSatish Balay }
1219cee3aa6bSSatish Balay 
12209371c9d4SSatish Balay PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1221cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1222d64ed03dSBarry Smith 
1223d64ed03dSBarry Smith   PetscFunctionBegin;
12249566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
12259566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz));
12269566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD));
12279566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz));
12283a40ed3dSBarry Smith   PetscFunctionReturn(0);
1229cee3aa6bSSatish Balay }
1230cee3aa6bSSatish Balay 
12319371c9d4SSatish Balay PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) {
1232cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1233cee3aa6bSSatish Balay 
1234d64ed03dSBarry Smith   PetscFunctionBegin;
1235cee3aa6bSSatish Balay   /* do nondiagonal part */
12369566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1237cee3aa6bSSatish Balay   /* do local part */
12389566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy));
1239e4a140f6SJunchao Zhang   /* add partial results together */
12409566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
12419566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE));
12423a40ed3dSBarry Smith   PetscFunctionReturn(0);
1243cee3aa6bSSatish Balay }
1244cee3aa6bSSatish Balay 
12459371c9d4SSatish Balay PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) {
1246cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1247cee3aa6bSSatish Balay 
1248d64ed03dSBarry Smith   PetscFunctionBegin;
1249cee3aa6bSSatish Balay   /* do nondiagonal part */
12509566063dSJacob Faibussowitsch   PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec));
1251cee3aa6bSSatish Balay   /* do local part */
12529566063dSJacob Faibussowitsch   PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz));
1253e4a140f6SJunchao Zhang   /* add partial results together */
12549566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
12559566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE));
12563a40ed3dSBarry Smith   PetscFunctionReturn(0);
1257cee3aa6bSSatish Balay }
1258cee3aa6bSSatish Balay 
1259cee3aa6bSSatish Balay /*
1260cee3aa6bSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1261cee3aa6bSSatish Balay    diagonal block
1262cee3aa6bSSatish Balay */
12639371c9d4SSatish Balay PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) {
1264d64ed03dSBarry Smith   PetscFunctionBegin;
12655f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block");
12669566063dSJacob Faibussowitsch   PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v));
12673a40ed3dSBarry Smith   PetscFunctionReturn(0);
1268cee3aa6bSSatish Balay }
1269cee3aa6bSSatish Balay 
12709371c9d4SSatish Balay PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) {
1271cee3aa6bSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1272d64ed03dSBarry Smith 
1273d64ed03dSBarry Smith   PetscFunctionBegin;
12749566063dSJacob Faibussowitsch   PetscCall(MatScale(a->A, aa));
12759566063dSJacob Faibussowitsch   PetscCall(MatScale(a->B, aa));
12763a40ed3dSBarry Smith   PetscFunctionReturn(0);
1277cee3aa6bSSatish Balay }
1278026e39d0SSatish Balay 
12799371c9d4SSatish Balay PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1280acdf5bf4SSatish Balay   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
128187828ca2SBarry Smith   PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1282d0f46423SBarry Smith   PetscInt     bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1283d0f46423SBarry Smith   PetscInt     nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1284899cda47SBarry Smith   PetscInt    *cmap, *idx_p, cstart = mat->cstartbs;
1285acdf5bf4SSatish Balay 
1286d64ed03dSBarry Smith   PetscFunctionBegin;
12875f80ce2aSJacob Faibussowitsch   PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
12885f80ce2aSJacob Faibussowitsch   PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1289acdf5bf4SSatish Balay   mat->getrowactive = PETSC_TRUE;
1290acdf5bf4SSatish Balay 
1291acdf5bf4SSatish Balay   if (!mat->rowvalues && (idx || v)) {
1292acdf5bf4SSatish Balay     /*
1293acdf5bf4SSatish Balay         allocate enough space to hold information from the longest row.
1294acdf5bf4SSatish Balay     */
1295acdf5bf4SSatish Balay     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data;
1296b24ad042SBarry Smith     PetscInt     max = 1, mbs = mat->mbs, tmp;
1297bd16c2feSSatish Balay     for (i = 0; i < mbs; i++) {
1298acdf5bf4SSatish Balay       tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i];
129926fbe8dcSKarl Rupp       if (max < tmp) max = tmp;
1300acdf5bf4SSatish Balay     }
13019566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1302acdf5bf4SSatish Balay   }
1303d9d09a02SSatish Balay   lrow = row - brstart;
1304acdf5bf4SSatish Balay 
13059371c9d4SSatish Balay   pvA = &vworkA;
13069371c9d4SSatish Balay   pcA = &cworkA;
13079371c9d4SSatish Balay   pvB = &vworkB;
13089371c9d4SSatish Balay   pcB = &cworkB;
13099371c9d4SSatish Balay   if (!v) {
13109371c9d4SSatish Balay     pvA = NULL;
13119371c9d4SSatish Balay     pvB = NULL;
13129371c9d4SSatish Balay   }
13139371c9d4SSatish Balay   if (!idx) {
13149371c9d4SSatish Balay     pcA = NULL;
13159371c9d4SSatish Balay     if (!v) pcB = NULL;
13169371c9d4SSatish Balay   }
13179566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
13189566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1319acdf5bf4SSatish Balay   nztot = nzA + nzB;
1320acdf5bf4SSatish Balay 
1321acdf5bf4SSatish Balay   cmap = mat->garray;
1322acdf5bf4SSatish Balay   if (v || idx) {
1323acdf5bf4SSatish Balay     if (nztot) {
1324acdf5bf4SSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1325b24ad042SBarry Smith       PetscInt imark = -1;
1326acdf5bf4SSatish Balay       if (v) {
1327acdf5bf4SSatish Balay         *v = v_p = mat->rowvalues;
1328acdf5bf4SSatish Balay         for (i = 0; i < nzB; i++) {
1329d9d09a02SSatish Balay           if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1330acdf5bf4SSatish Balay           else break;
1331acdf5bf4SSatish Balay         }
1332acdf5bf4SSatish Balay         imark = i;
1333acdf5bf4SSatish Balay         for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1334acdf5bf4SSatish Balay         for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1335acdf5bf4SSatish Balay       }
1336acdf5bf4SSatish Balay       if (idx) {
1337acdf5bf4SSatish Balay         *idx = idx_p = mat->rowindices;
1338acdf5bf4SSatish Balay         if (imark > -1) {
1339ad540459SPierre Jolivet           for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1340acdf5bf4SSatish Balay         } else {
1341acdf5bf4SSatish Balay           for (i = 0; i < nzB; i++) {
134226fbe8dcSKarl Rupp             if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1343acdf5bf4SSatish Balay             else break;
1344acdf5bf4SSatish Balay           }
1345acdf5bf4SSatish Balay           imark = i;
1346acdf5bf4SSatish Balay         }
1347d9d09a02SSatish Balay         for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1348d9d09a02SSatish Balay         for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1349acdf5bf4SSatish Balay       }
1350d64ed03dSBarry Smith     } else {
1351f4259b30SLisandro Dalcin       if (idx) *idx = NULL;
1352f4259b30SLisandro Dalcin       if (v) *v = NULL;
1353d212a18eSSatish Balay     }
1354acdf5bf4SSatish Balay   }
1355acdf5bf4SSatish Balay   *nz = nztot;
13569566063dSJacob Faibussowitsch   PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
13579566063dSJacob Faibussowitsch   PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
13583a40ed3dSBarry Smith   PetscFunctionReturn(0);
1359acdf5bf4SSatish Balay }
1360acdf5bf4SSatish Balay 
13619371c9d4SSatish Balay PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) {
1362acdf5bf4SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
1363d64ed03dSBarry Smith 
1364d64ed03dSBarry Smith   PetscFunctionBegin;
13655f80ce2aSJacob Faibussowitsch   PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called");
1366acdf5bf4SSatish Balay   baij->getrowactive = PETSC_FALSE;
13673a40ed3dSBarry Smith   PetscFunctionReturn(0);
1368acdf5bf4SSatish Balay }
1369acdf5bf4SSatish Balay 
13709371c9d4SSatish Balay PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) {
137158667388SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
1372d64ed03dSBarry Smith 
1373d64ed03dSBarry Smith   PetscFunctionBegin;
13749566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->A));
13759566063dSJacob Faibussowitsch   PetscCall(MatZeroEntries(l->B));
13763a40ed3dSBarry Smith   PetscFunctionReturn(0);
137758667388SSatish Balay }
13780ac07820SSatish Balay 
13799371c9d4SSatish Balay PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) {
13804e220ebcSLois Curfman McInnes   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ *)matin->data;
13814e220ebcSLois Curfman McInnes   Mat            A = a->A, B = a->B;
13823966268fSBarry Smith   PetscLogDouble isend[5], irecv[5];
13830ac07820SSatish Balay 
1384d64ed03dSBarry Smith   PetscFunctionBegin;
1385d0f46423SBarry Smith   info->block_size = (PetscReal)matin->rmap->bs;
138626fbe8dcSKarl Rupp 
13879566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(A, MAT_LOCAL, info));
138826fbe8dcSKarl Rupp 
13899371c9d4SSatish Balay   isend[0] = info->nz_used;
13909371c9d4SSatish Balay   isend[1] = info->nz_allocated;
13919371c9d4SSatish Balay   isend[2] = info->nz_unneeded;
13929371c9d4SSatish Balay   isend[3] = info->memory;
13939371c9d4SSatish Balay   isend[4] = info->mallocs;
139426fbe8dcSKarl Rupp 
13959566063dSJacob Faibussowitsch   PetscCall(MatGetInfo(B, MAT_LOCAL, info));
139626fbe8dcSKarl Rupp 
13979371c9d4SSatish Balay   isend[0] += info->nz_used;
13989371c9d4SSatish Balay   isend[1] += info->nz_allocated;
13999371c9d4SSatish Balay   isend[2] += info->nz_unneeded;
14009371c9d4SSatish Balay   isend[3] += info->memory;
14019371c9d4SSatish Balay   isend[4] += info->mallocs;
140226fbe8dcSKarl Rupp 
14030ac07820SSatish Balay   if (flag == MAT_LOCAL) {
14044e220ebcSLois Curfman McInnes     info->nz_used      = isend[0];
14054e220ebcSLois Curfman McInnes     info->nz_allocated = isend[1];
14064e220ebcSLois Curfman McInnes     info->nz_unneeded  = isend[2];
14074e220ebcSLois Curfman McInnes     info->memory       = isend[3];
14084e220ebcSLois Curfman McInnes     info->mallocs      = isend[4];
14090ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
14101c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
141126fbe8dcSKarl Rupp 
14124e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14134e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14144e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14154e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14164e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
14170ac07820SSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
14181c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
141926fbe8dcSKarl Rupp 
14204e220ebcSLois Curfman McInnes     info->nz_used      = irecv[0];
14214e220ebcSLois Curfman McInnes     info->nz_allocated = irecv[1];
14224e220ebcSLois Curfman McInnes     info->nz_unneeded  = irecv[2];
14234e220ebcSLois Curfman McInnes     info->memory       = irecv[3];
14244e220ebcSLois Curfman McInnes     info->mallocs      = irecv[4];
142598921bdaSJacob Faibussowitsch   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
14264e220ebcSLois Curfman McInnes   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
14274e220ebcSLois Curfman McInnes   info->fill_ratio_needed = 0;
14284e220ebcSLois Curfman McInnes   info->factor_mallocs    = 0;
14293a40ed3dSBarry Smith   PetscFunctionReturn(0);
14300ac07820SSatish Balay }
14310ac07820SSatish Balay 
14329371c9d4SSatish Balay PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) {
143358667388SSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
143458667388SSatish Balay 
1435d64ed03dSBarry Smith   PetscFunctionBegin;
143612c028f9SKris Buschelman   switch (op) {
1437512a5fc5SBarry Smith   case MAT_NEW_NONZERO_LOCATIONS:
143812c028f9SKris Buschelman   case MAT_NEW_NONZERO_ALLOCATION_ERR:
143928b2fa4aSMatthew Knepley   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1440a9817697SBarry Smith   case MAT_KEEP_NONZERO_PATTERN:
144112c028f9SKris Buschelman   case MAT_NEW_NONZERO_LOCATION_ERR:
144243674050SBarry Smith     MatCheckPreallocated(A, 1);
14439566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14449566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
144512c028f9SKris Buschelman     break;
144612c028f9SKris Buschelman   case MAT_ROW_ORIENTED:
144743674050SBarry Smith     MatCheckPreallocated(A, 1);
14484e0d8c25SBarry Smith     a->roworiented = flg;
144926fbe8dcSKarl Rupp 
14509566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->A, op, flg));
14519566063dSJacob Faibussowitsch     PetscCall(MatSetOption(a->B, op, flg));
145212c028f9SKris Buschelman     break;
14538c78258cSHong Zhang   case MAT_FORCE_DIAGONAL_ENTRIES:
14549371c9d4SSatish Balay   case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break;
14559371c9d4SSatish Balay   case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break;
145612c028f9SKris Buschelman   case MAT_USE_HASH_TABLE:
14574e0d8c25SBarry Smith     a->ht_flag = flg;
1458abf3b562SBarry Smith     a->ht_fact = 1.39;
145912c028f9SKris Buschelman     break;
146077e54ba9SKris Buschelman   case MAT_SYMMETRIC:
146177e54ba9SKris Buschelman   case MAT_STRUCTURALLY_SYMMETRIC:
14622188ac68SBarry Smith   case MAT_HERMITIAN:
1463c10200c1SHong Zhang   case MAT_SUBMAT_SINGLEIS:
14642188ac68SBarry Smith   case MAT_SYMMETRY_ETERNAL:
1465b94d7dedSBarry Smith   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1466b94d7dedSBarry Smith   case MAT_SPD_ETERNAL:
1467b94d7dedSBarry Smith     /* if the diagonal matrix is square it inherits some of the properties above */
146877e54ba9SKris Buschelman     break;
14699371c9d4SSatish Balay   default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op);
1470d64ed03dSBarry Smith   }
14713a40ed3dSBarry Smith   PetscFunctionReturn(0);
147258667388SSatish Balay }
147358667388SSatish Balay 
14749371c9d4SSatish Balay PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) {
14750ac07820SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
14760ac07820SSatish Balay   Mat_SeqBAIJ *Aloc;
14770ac07820SSatish Balay   Mat          B;
1478d0f46423SBarry Smith   PetscInt     M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col;
1479d0f46423SBarry Smith   PetscInt     bs = A->rmap->bs, mbs = baij->mbs;
14803eda8832SBarry Smith   MatScalar   *a;
14810ac07820SSatish Balay 
1482d64ed03dSBarry Smith   PetscFunctionBegin;
14837fb60732SBarry Smith   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout));
1484cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
14859566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
14869566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M));
14879566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, ((PetscObject)A)->type_name));
14882e72b8d9SBarry Smith     /* Do not know preallocation information, but must set block size */
14899566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL));
1490fc4dec0aSBarry Smith   } else {
1491fc4dec0aSBarry Smith     B = *matout;
1492fc4dec0aSBarry Smith   }
14930ac07820SSatish Balay 
14940ac07820SSatish Balay   /* copy over the A part */
14950ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ *)baij->A->data;
14969371c9d4SSatish Balay   ai   = Aloc->i;
14979371c9d4SSatish Balay   aj   = Aloc->j;
14989371c9d4SSatish Balay   a    = Aloc->a;
14999566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs, &rvals));
15000ac07820SSatish Balay 
15010ac07820SSatish Balay   for (i = 0; i < mbs; i++) {
1502899cda47SBarry Smith     rvals[0] = bs * (baij->rstartbs + i);
150326fbe8dcSKarl Rupp     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
15040ac07820SSatish Balay     for (j = ai[i]; j < ai[i + 1]; j++) {
1505899cda47SBarry Smith       col = (baij->cstartbs + aj[j]) * bs;
15060ac07820SSatish Balay       for (k = 0; k < bs; k++) {
15079566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
150826fbe8dcSKarl Rupp 
15099371c9d4SSatish Balay         col++;
15109371c9d4SSatish Balay         a += bs;
15110ac07820SSatish Balay       }
15120ac07820SSatish Balay     }
15130ac07820SSatish Balay   }
15140ac07820SSatish Balay   /* copy over the B part */
15150ac07820SSatish Balay   Aloc = (Mat_SeqBAIJ *)baij->B->data;
15169371c9d4SSatish Balay   ai   = Aloc->i;
15179371c9d4SSatish Balay   aj   = Aloc->j;
15189371c9d4SSatish Balay   a    = Aloc->a;
15190ac07820SSatish Balay   for (i = 0; i < mbs; i++) {
1520899cda47SBarry Smith     rvals[0] = bs * (baij->rstartbs + i);
152126fbe8dcSKarl Rupp     for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
15220ac07820SSatish Balay     for (j = ai[i]; j < ai[i + 1]; j++) {
15230ac07820SSatish Balay       col = baij->garray[aj[j]] * bs;
15240ac07820SSatish Balay       for (k = 0; k < bs; k++) {
15259566063dSJacob Faibussowitsch         PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES));
152626fbe8dcSKarl Rupp         col++;
152726fbe8dcSKarl Rupp         a += bs;
15280ac07820SSatish Balay       }
15290ac07820SSatish Balay     }
15300ac07820SSatish Balay   }
15319566063dSJacob Faibussowitsch   PetscCall(PetscFree(rvals));
15329566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
15339566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
15340ac07820SSatish Balay 
1535cf37664fSBarry Smith   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
153648a46eb9SPierre Jolivet   else PetscCall(MatHeaderMerge(A, &B));
15373a40ed3dSBarry Smith   PetscFunctionReturn(0);
15380ac07820SSatish Balay }
15390e95ebc0SSatish Balay 
15409371c9d4SSatish Balay PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) {
154136c4a09eSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
154236c4a09eSSatish Balay   Mat          a = baij->A, b = baij->B;
1543b24ad042SBarry Smith   PetscInt     s1, s2, s3;
15440e95ebc0SSatish Balay 
1545d64ed03dSBarry Smith   PetscFunctionBegin;
15469566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &s2, &s3));
154736c4a09eSSatish Balay   if (rr) {
15489566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(rr, &s1));
15495f80ce2aSJacob Faibussowitsch     PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size");
155036c4a09eSSatish Balay     /* Overlap communication with computation. */
15519566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
155236c4a09eSSatish Balay   }
15530e95ebc0SSatish Balay   if (ll) {
15549566063dSJacob Faibussowitsch     PetscCall(VecGetLocalSize(ll, &s1));
15555f80ce2aSJacob Faibussowitsch     PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size");
1556dbbe0bcdSBarry Smith     PetscUseTypeMethod(b, diagonalscale, ll, NULL);
15570e95ebc0SSatish Balay   }
155836c4a09eSSatish Balay   /* scale  the diagonal block */
1559dbbe0bcdSBarry Smith   PetscUseTypeMethod(a, diagonalscale, ll, rr);
156036c4a09eSSatish Balay 
156136c4a09eSSatish Balay   if (rr) {
156236c4a09eSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
15639566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1564dbbe0bcdSBarry Smith     PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
156536c4a09eSSatish Balay   }
15663a40ed3dSBarry Smith   PetscFunctionReturn(0);
15670e95ebc0SSatish Balay }
15680e95ebc0SSatish Balay 
15699371c9d4SSatish Balay PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
15700ac07820SSatish Balay   Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data;
157165a92638SMatthew G. Knepley   PetscInt    *lrows;
15726e520ac8SStefano Zampini   PetscInt     r, len;
157394342113SStefano Zampini   PetscBool    cong;
15740ac07820SSatish Balay 
1575d64ed03dSBarry Smith   PetscFunctionBegin;
15766e520ac8SStefano Zampini   /* get locally owned rows */
15779566063dSJacob Faibussowitsch   PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows));
157897b48c8fSBarry Smith   /* fix right hand side if needed */
157997b48c8fSBarry Smith   if (x && b) {
158065a92638SMatthew G. Knepley     const PetscScalar *xx;
158165a92638SMatthew G. Knepley     PetscScalar       *bb;
158265a92638SMatthew G. Knepley 
15839566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(x, &xx));
15849566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
158565a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]];
15869566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(x, &xx));
15879566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
158897b48c8fSBarry Smith   }
158997b48c8fSBarry Smith 
15900ac07820SSatish Balay   /* actually zap the local rows */
159172dacd9aSBarry Smith   /*
159272dacd9aSBarry Smith         Zero the required rows. If the "diagonal block" of the matrix
1593a8c7a070SBarry Smith      is square and the user wishes to set the diagonal we use separate
159472dacd9aSBarry Smith      code so that MatSetValues() is not called for each diagonal allocating
159572dacd9aSBarry Smith      new memory, thus calling lots of mallocs and slowing things down.
159672dacd9aSBarry Smith 
159772dacd9aSBarry Smith   */
15989c957beeSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
15999566063dSJacob Faibussowitsch   PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL));
16009566063dSJacob Faibussowitsch   PetscCall(MatHasCongruentLayouts(A, &cong));
160194342113SStefano Zampini   if ((diag != 0.0) && cong) {
16029566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL));
1603f4df32b1SMatthew Knepley   } else if (diag != 0.0) {
16049566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
16055f80ce2aSJacob Faibussowitsch     PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1606512a5fc5SBarry Smith        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
160765a92638SMatthew G. Knepley     for (r = 0; r < len; ++r) {
160865a92638SMatthew G. Knepley       const PetscInt row = lrows[r] + A->rmap->rstart;
16099566063dSJacob Faibussowitsch       PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES));
1610a07cd24cSSatish Balay     }
16119566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
16129566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
16139c957beeSSatish Balay   } else {
16149566063dSJacob Faibussowitsch     PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL));
1615a07cd24cSSatish Balay   }
16169566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
16174f9cfa9eSBarry Smith 
16184f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
16194f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
1620e56f5c9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
16211c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
1622e56f5c9eSBarry Smith   }
16233a40ed3dSBarry Smith   PetscFunctionReturn(0);
16240ac07820SSatish Balay }
162572dacd9aSBarry Smith 
16269371c9d4SSatish Balay PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) {
16276f0a72daSMatthew G. Knepley   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ *)A->data;
1628131c27b5Sprj-   PetscMPIInt        n = A->rmap->n, p = 0;
1629131c27b5Sprj-   PetscInt           i, j, k, r, len = 0, row, col, count;
16306f0a72daSMatthew G. Knepley   PetscInt          *lrows, *owners = A->rmap->range;
16316f0a72daSMatthew G. Knepley   PetscSFNode       *rrows;
16326f0a72daSMatthew G. Knepley   PetscSF            sf;
16336f0a72daSMatthew G. Knepley   const PetscScalar *xx;
16346f0a72daSMatthew G. Knepley   PetscScalar       *bb, *mask;
16356f0a72daSMatthew G. Knepley   Vec                xmask, lmask;
16366f0a72daSMatthew G. Knepley   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ *)l->B->data;
16376f0a72daSMatthew G. Knepley   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
16386f0a72daSMatthew G. Knepley   PetscScalar       *aa;
16396f0a72daSMatthew G. Knepley 
16406f0a72daSMatthew G. Knepley   PetscFunctionBegin;
16416f0a72daSMatthew G. Knepley   /* Create SF where leaves are input rows and roots are owned rows */
16429566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lrows));
16436f0a72daSMatthew G. Knepley   for (r = 0; r < n; ++r) lrows[r] = -1;
16449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(N, &rrows));
16456f0a72daSMatthew G. Knepley   for (r = 0; r < N; ++r) {
16466f0a72daSMatthew G. Knepley     const PetscInt idx = rows[r];
16475f80ce2aSJacob Faibussowitsch     PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N);
16485ba17502SJed Brown     if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */
16499566063dSJacob Faibussowitsch       PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p));
16505ba17502SJed Brown     }
16516f0a72daSMatthew G. Knepley     rrows[r].rank  = p;
16526f0a72daSMatthew G. Knepley     rrows[r].index = rows[r] - owners[p];
16536f0a72daSMatthew G. Knepley   }
16549566063dSJacob Faibussowitsch   PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf));
16559566063dSJacob Faibussowitsch   PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER));
16566f0a72daSMatthew G. Knepley   /* Collect flags for rows to be zeroed */
16579566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
16589566063dSJacob Faibussowitsch   PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR));
16599566063dSJacob Faibussowitsch   PetscCall(PetscSFDestroy(&sf));
16606f0a72daSMatthew G. Knepley   /* Compress and put in row numbers */
16619371c9d4SSatish Balay   for (r = 0; r < n; ++r)
16629371c9d4SSatish Balay     if (lrows[r] >= 0) lrows[len++] = r;
16636f0a72daSMatthew G. Knepley   /* zero diagonal part of matrix */
16649566063dSJacob Faibussowitsch   PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b));
16656f0a72daSMatthew G. Knepley   /* handle off diagonal part of matrix */
16669566063dSJacob Faibussowitsch   PetscCall(MatCreateVecs(A, &xmask, NULL));
16679566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(l->lvec, &lmask));
16689566063dSJacob Faibussowitsch   PetscCall(VecGetArray(xmask, &bb));
16696f0a72daSMatthew G. Knepley   for (i = 0; i < len; i++) bb[lrows[i]] = 1;
16709566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(xmask, &bb));
16719566063dSJacob Faibussowitsch   PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
16729566063dSJacob Faibussowitsch   PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD));
16739566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&xmask));
16746f0a72daSMatthew G. Knepley   if (x) {
16759566063dSJacob Faibussowitsch     PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
16769566063dSJacob Faibussowitsch     PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD));
16779566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(l->lvec, &xx));
16789566063dSJacob Faibussowitsch     PetscCall(VecGetArray(b, &bb));
16796f0a72daSMatthew G. Knepley   }
16809566063dSJacob Faibussowitsch   PetscCall(VecGetArray(lmask, &mask));
16816f0a72daSMatthew G. Knepley   /* remove zeroed rows of off diagonal matrix */
16826f0a72daSMatthew G. Knepley   for (i = 0; i < len; ++i) {
16836f0a72daSMatthew G. Knepley     row   = lrows[i];
16846f0a72daSMatthew G. Knepley     count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs;
16856f0a72daSMatthew G. Knepley     aa    = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs);
16866f0a72daSMatthew G. Knepley     for (k = 0; k < count; ++k) {
16876f0a72daSMatthew G. Knepley       aa[0] = 0.0;
16886f0a72daSMatthew G. Knepley       aa += bs;
16896f0a72daSMatthew G. Knepley     }
16906f0a72daSMatthew G. Knepley   }
16916f0a72daSMatthew G. Knepley   /* loop over all elements of off process part of matrix zeroing removed columns*/
16926f0a72daSMatthew G. Knepley   for (i = 0; i < l->B->rmap->N; ++i) {
16936f0a72daSMatthew G. Knepley     row = i / bs;
16946f0a72daSMatthew G. Knepley     for (j = baij->i[row]; j < baij->i[row + 1]; ++j) {
16956f0a72daSMatthew G. Knepley       for (k = 0; k < bs; ++k) {
16966f0a72daSMatthew G. Knepley         col = bs * baij->j[j] + k;
16976f0a72daSMatthew G. Knepley         if (PetscAbsScalar(mask[col])) {
16986f0a72daSMatthew G. Knepley           aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k;
169989ae1891SBarry Smith           if (x) bb[i] -= aa[0] * xx[col];
17006f0a72daSMatthew G. Knepley           aa[0] = 0.0;
17016f0a72daSMatthew G. Knepley         }
17026f0a72daSMatthew G. Knepley       }
17036f0a72daSMatthew G. Knepley     }
17046f0a72daSMatthew G. Knepley   }
17056f0a72daSMatthew G. Knepley   if (x) {
17069566063dSJacob Faibussowitsch     PetscCall(VecRestoreArray(b, &bb));
17079566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(l->lvec, &xx));
17086f0a72daSMatthew G. Knepley   }
17099566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(lmask, &mask));
17109566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&lmask));
17119566063dSJacob Faibussowitsch   PetscCall(PetscFree(lrows));
17124f9cfa9eSBarry Smith 
17134f9cfa9eSBarry Smith   /* only change matrix nonzero state if pattern was allowed to be changed */
17144f9cfa9eSBarry Smith   if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) {
17154f9cfa9eSBarry Smith     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
17161c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A)));
17174f9cfa9eSBarry Smith   }
17186f0a72daSMatthew G. Knepley   PetscFunctionReturn(0);
17196f0a72daSMatthew G. Knepley }
17206f0a72daSMatthew G. Knepley 
17219371c9d4SSatish Balay PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) {
1722bb5a7306SBarry Smith   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
1723d64ed03dSBarry Smith 
1724d64ed03dSBarry Smith   PetscFunctionBegin;
17259566063dSJacob Faibussowitsch   PetscCall(MatSetUnfactored(a->A));
17263a40ed3dSBarry Smith   PetscFunctionReturn(0);
1727bb5a7306SBarry Smith }
1728bb5a7306SBarry Smith 
17296849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *);
17300ac07820SSatish Balay 
17319371c9d4SSatish Balay PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) {
17327fc3c18eSBarry Smith   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data;
17337fc3c18eSBarry Smith   Mat          a, b, c, d;
1734ace3abfcSBarry Smith   PetscBool    flg;
17357fc3c18eSBarry Smith 
17367fc3c18eSBarry Smith   PetscFunctionBegin;
17379371c9d4SSatish Balay   a = matA->A;
17389371c9d4SSatish Balay   b = matA->B;
17399371c9d4SSatish Balay   c = matB->A;
17409371c9d4SSatish Balay   d = matB->B;
17417fc3c18eSBarry Smith 
17429566063dSJacob Faibussowitsch   PetscCall(MatEqual(a, c, &flg));
174348a46eb9SPierre Jolivet   if (flg) PetscCall(MatEqual(b, d, &flg));
17441c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
17457fc3c18eSBarry Smith   PetscFunctionReturn(0);
17467fc3c18eSBarry Smith }
17477fc3c18eSBarry Smith 
17489371c9d4SSatish Balay PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) {
17493c896bc6SHong Zhang   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
17503c896bc6SHong Zhang   Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data;
17513c896bc6SHong Zhang 
17523c896bc6SHong Zhang   PetscFunctionBegin;
17533c896bc6SHong Zhang   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
17543c896bc6SHong Zhang   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
17559566063dSJacob Faibussowitsch     PetscCall(MatCopy_Basic(A, B, str));
17563c896bc6SHong Zhang   } else {
17579566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->A, b->A, str));
17589566063dSJacob Faibussowitsch     PetscCall(MatCopy(a->B, b->B, str));
17593c896bc6SHong Zhang   }
17609566063dSJacob Faibussowitsch   PetscCall(PetscObjectStateIncrease((PetscObject)B));
17613c896bc6SHong Zhang   PetscFunctionReturn(0);
17623c896bc6SHong Zhang }
1763273d9f13SBarry Smith 
17649371c9d4SSatish Balay PetscErrorCode MatSetUp_MPIBAIJ(Mat A) {
1765273d9f13SBarry Smith   PetscFunctionBegin;
17669566063dSJacob Faibussowitsch   PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL));
1767273d9f13SBarry Smith   PetscFunctionReturn(0);
1768273d9f13SBarry Smith }
1769273d9f13SBarry Smith 
17709371c9d4SSatish Balay PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) {
1771001ddc4fSHong Zhang   PetscInt     bs = Y->rmap->bs, m = Y->rmap->N / bs;
17724de5dceeSHong Zhang   Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data;
17734de5dceeSHong Zhang   Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data;
17744de5dceeSHong Zhang 
17754de5dceeSHong Zhang   PetscFunctionBegin;
17769566063dSJacob Faibussowitsch   PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz));
17774de5dceeSHong Zhang   PetscFunctionReturn(0);
17784de5dceeSHong Zhang }
17794de5dceeSHong Zhang 
17809371c9d4SSatish Balay PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) {
17814fe895cdSHong Zhang   Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data;
17824fe895cdSHong Zhang   PetscBLASInt bnz, one                         = 1;
17834fe895cdSHong Zhang   Mat_SeqBAIJ *x, *y;
1784b31f67cfSBarry Smith   PetscInt     bs2 = Y->rmap->bs * Y->rmap->bs;
17854fe895cdSHong Zhang 
17864fe895cdSHong Zhang   PetscFunctionBegin;
17874fe895cdSHong Zhang   if (str == SAME_NONZERO_PATTERN) {
17884fe895cdSHong Zhang     PetscScalar alpha = a;
17894fe895cdSHong Zhang     x                 = (Mat_SeqBAIJ *)xx->A->data;
17904fe895cdSHong Zhang     y                 = (Mat_SeqBAIJ *)yy->A->data;
17919566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1792792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
17934fe895cdSHong Zhang     x = (Mat_SeqBAIJ *)xx->B->data;
17944fe895cdSHong Zhang     y = (Mat_SeqBAIJ *)yy->B->data;
17959566063dSJacob Faibussowitsch     PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz));
1796792fecdfSBarry Smith     PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one));
17979566063dSJacob Faibussowitsch     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1798ab784542SHong Zhang   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
17999566063dSJacob Faibussowitsch     PetscCall(MatAXPY_Basic(Y, a, X, str));
18004fe895cdSHong Zhang   } else {
18014de5dceeSHong Zhang     Mat       B;
18024de5dceeSHong Zhang     PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
18039566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
18049566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
18059566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
18069566063dSJacob Faibussowitsch     PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
18079566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
18089566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
18099566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, MATMPIBAIJ));
18109566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d));
18119566063dSJacob Faibussowitsch     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
18129566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
18134de5dceeSHong Zhang     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
18149566063dSJacob Faibussowitsch     PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
18159566063dSJacob Faibussowitsch     PetscCall(MatHeaderMerge(Y, &B));
18169566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_d));
18179566063dSJacob Faibussowitsch     PetscCall(PetscFree(nnz_o));
18184fe895cdSHong Zhang   }
18194fe895cdSHong Zhang   PetscFunctionReturn(0);
18204fe895cdSHong Zhang }
18214fe895cdSHong Zhang 
18229371c9d4SSatish Balay PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) {
18235f80ce2aSJacob Faibussowitsch   PetscFunctionBegin;
18245f80ce2aSJacob Faibussowitsch   if (PetscDefined(USE_COMPLEX)) {
18252726fb6dSPierre Jolivet     Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data;
18262726fb6dSPierre Jolivet 
18279566063dSJacob Faibussowitsch     PetscCall(MatConjugate_SeqBAIJ(a->A));
18289566063dSJacob Faibussowitsch     PetscCall(MatConjugate_SeqBAIJ(a->B));
18295f80ce2aSJacob Faibussowitsch   }
18302726fb6dSPierre Jolivet   PetscFunctionReturn(0);
18312726fb6dSPierre Jolivet }
18322726fb6dSPierre Jolivet 
18339371c9d4SSatish Balay PetscErrorCode MatRealPart_MPIBAIJ(Mat A) {
183499cafbc1SBarry Smith   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
183599cafbc1SBarry Smith 
183699cafbc1SBarry Smith   PetscFunctionBegin;
18379566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->A));
18389566063dSJacob Faibussowitsch   PetscCall(MatRealPart(a->B));
183999cafbc1SBarry Smith   PetscFunctionReturn(0);
184099cafbc1SBarry Smith }
184199cafbc1SBarry Smith 
18429371c9d4SSatish Balay PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) {
184399cafbc1SBarry Smith   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
184499cafbc1SBarry Smith 
184599cafbc1SBarry Smith   PetscFunctionBegin;
18469566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->A));
18479566063dSJacob Faibussowitsch   PetscCall(MatImaginaryPart(a->B));
184899cafbc1SBarry Smith   PetscFunctionReturn(0);
184999cafbc1SBarry Smith }
185099cafbc1SBarry Smith 
18519371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) {
18524aa3045dSJed Brown   IS       iscol_local;
18534aa3045dSJed Brown   PetscInt csize;
18544aa3045dSJed Brown 
18554aa3045dSJed Brown   PetscFunctionBegin;
18569566063dSJacob Faibussowitsch   PetscCall(ISGetLocalSize(iscol, &csize));
1857b79d0421SJed Brown   if (call == MAT_REUSE_MATRIX) {
18589566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
18595f80ce2aSJacob Faibussowitsch     PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1860b79d0421SJed Brown   } else {
18619566063dSJacob Faibussowitsch     PetscCall(ISAllGather(iscol, &iscol_local));
1862b79d0421SJed Brown   }
18639566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1864b79d0421SJed Brown   if (call == MAT_INITIAL_MATRIX) {
18659566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
18669566063dSJacob Faibussowitsch     PetscCall(ISDestroy(&iscol_local));
1867b79d0421SJed Brown   }
18684aa3045dSJed Brown   PetscFunctionReturn(0);
18694aa3045dSJed Brown }
187017df9f7cSHong Zhang 
187182094794SBarry Smith /*
187282094794SBarry Smith   Not great since it makes two copies of the submatrix, first an SeqBAIJ
187382094794SBarry Smith   in local and then by concatenating the local matrices the end result.
18747dae84e0SHong Zhang   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
18758f46ffcaSHong Zhang   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
187682094794SBarry Smith */
18779371c9d4SSatish Balay PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) {
187882094794SBarry Smith   PetscMPIInt  rank, size;
187982094794SBarry Smith   PetscInt     i, m, n, rstart, row, rend, nz, *cwork, j, bs;
1880c9ffca76SHong Zhang   PetscInt    *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal;
188129dcf524SDmitry Karpeev   Mat          M, Mreuse;
188282094794SBarry Smith   MatScalar   *vwork, *aa;
1883ce94432eSBarry Smith   MPI_Comm     comm;
188429dcf524SDmitry Karpeev   IS           isrow_new, iscol_new;
188582094794SBarry Smith   Mat_SeqBAIJ *aij;
188682094794SBarry Smith 
188782094794SBarry Smith   PetscFunctionBegin;
18889566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
18899566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(comm, &rank));
18909566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
189129dcf524SDmitry Karpeev   /* The compression and expansion should be avoided. Doesn't point
189229dcf524SDmitry Karpeev      out errors, might change the indices, hence buggey */
18939566063dSJacob Faibussowitsch   PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new));
18949566063dSJacob Faibussowitsch   PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new));
189582094794SBarry Smith 
189682094794SBarry Smith   if (call == MAT_REUSE_MATRIX) {
18979566063dSJacob Faibussowitsch     PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse));
18985f80ce2aSJacob Faibussowitsch     PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
18999566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse));
190082094794SBarry Smith   } else {
19019566063dSJacob Faibussowitsch     PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse));
190282094794SBarry Smith   }
19039566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&isrow_new));
19049566063dSJacob Faibussowitsch   PetscCall(ISDestroy(&iscol_new));
190582094794SBarry Smith   /*
190682094794SBarry Smith       m - number of local rows
190782094794SBarry Smith       n - number of columns (same on all processors)
190882094794SBarry Smith       rstart - first row in new global matrix generated
190982094794SBarry Smith   */
19109566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
19119566063dSJacob Faibussowitsch   PetscCall(MatGetSize(Mreuse, &m, &n));
191282094794SBarry Smith   m = m / bs;
191382094794SBarry Smith   n = n / bs;
191482094794SBarry Smith 
191582094794SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
191682094794SBarry Smith     aij = (Mat_SeqBAIJ *)(Mreuse)->data;
191782094794SBarry Smith     ii  = aij->i;
191882094794SBarry Smith     jj  = aij->j;
191982094794SBarry Smith 
192082094794SBarry Smith     /*
192182094794SBarry Smith         Determine the number of non-zeros in the diagonal and off-diagonal
192282094794SBarry Smith         portions of the matrix in order to do correct preallocation
192382094794SBarry Smith     */
192482094794SBarry Smith 
192582094794SBarry Smith     /* first get start and end of "diagonal" columns */
192682094794SBarry Smith     if (csize == PETSC_DECIDE) {
19279566063dSJacob Faibussowitsch       PetscCall(ISGetSize(isrow, &mglobal));
192882094794SBarry Smith       if (mglobal == n * bs) { /* square matrix */
192982094794SBarry Smith         nlocal = m;
193082094794SBarry Smith       } else {
193182094794SBarry Smith         nlocal = n / size + ((n % size) > rank);
193282094794SBarry Smith       }
193382094794SBarry Smith     } else {
193482094794SBarry Smith       nlocal = csize / bs;
193582094794SBarry Smith     }
19369566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm));
193782094794SBarry Smith     rstart = rend - nlocal;
1938aed4548fSBarry Smith     PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n);
193982094794SBarry Smith 
194082094794SBarry Smith     /* next, compute all the lengths */
19419566063dSJacob Faibussowitsch     PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens));
194282094794SBarry Smith     for (i = 0; i < m; i++) {
194382094794SBarry Smith       jend = ii[i + 1] - ii[i];
194482094794SBarry Smith       olen = 0;
194582094794SBarry Smith       dlen = 0;
194682094794SBarry Smith       for (j = 0; j < jend; j++) {
194782094794SBarry Smith         if (*jj < rstart || *jj >= rend) olen++;
194882094794SBarry Smith         else dlen++;
194982094794SBarry Smith         jj++;
195082094794SBarry Smith       }
195182094794SBarry Smith       olens[i] = olen;
195282094794SBarry Smith       dlens[i] = dlen;
195382094794SBarry Smith     }
19549566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, &M));
19559566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n));
19569566063dSJacob Faibussowitsch     PetscCall(MatSetType(M, ((PetscObject)mat)->type_name));
19579566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
19589566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens));
19599566063dSJacob Faibussowitsch     PetscCall(PetscFree2(dlens, olens));
196082094794SBarry Smith   } else {
196182094794SBarry Smith     PetscInt ml, nl;
196282094794SBarry Smith 
196382094794SBarry Smith     M = *newmat;
19649566063dSJacob Faibussowitsch     PetscCall(MatGetLocalSize(M, &ml, &nl));
19655f80ce2aSJacob Faibussowitsch     PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request");
19669566063dSJacob Faibussowitsch     PetscCall(MatZeroEntries(M));
196782094794SBarry Smith     /*
196882094794SBarry Smith          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
196982094794SBarry Smith        rather than the slower MatSetValues().
197082094794SBarry Smith     */
197182094794SBarry Smith     M->was_assembled = PETSC_TRUE;
197282094794SBarry Smith     M->assembled     = PETSC_FALSE;
197382094794SBarry Smith   }
19749566063dSJacob Faibussowitsch   PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE));
19759566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(M, &rstart, &rend));
197682094794SBarry Smith   aij = (Mat_SeqBAIJ *)(Mreuse)->data;
197782094794SBarry Smith   ii  = aij->i;
197882094794SBarry Smith   jj  = aij->j;
197982094794SBarry Smith   aa  = aij->a;
198082094794SBarry Smith   for (i = 0; i < m; i++) {
198182094794SBarry Smith     row   = rstart / bs + i;
198282094794SBarry Smith     nz    = ii[i + 1] - ii[i];
19839371c9d4SSatish Balay     cwork = jj;
19849371c9d4SSatish Balay     jj += nz;
19859371c9d4SSatish Balay     vwork = aa;
19869371c9d4SSatish Balay     aa += nz * bs * bs;
19879566063dSJacob Faibussowitsch     PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES));
198882094794SBarry Smith   }
198982094794SBarry Smith 
19909566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY));
19919566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY));
199282094794SBarry Smith   *newmat = M;
199382094794SBarry Smith 
199482094794SBarry Smith   /* save submatrix used in processor for next request */
199582094794SBarry Smith   if (call == MAT_INITIAL_MATRIX) {
19969566063dSJacob Faibussowitsch     PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse));
19979566063dSJacob Faibussowitsch     PetscCall(PetscObjectDereference((PetscObject)Mreuse));
199882094794SBarry Smith   }
199982094794SBarry Smith   PetscFunctionReturn(0);
200082094794SBarry Smith }
200182094794SBarry Smith 
20029371c9d4SSatish Balay PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) {
200382094794SBarry Smith   MPI_Comm        comm, pcomm;
2004a0a83eb5SRémi Lacroix   PetscInt        clocal_size, nrows;
200582094794SBarry Smith   const PetscInt *rows;
2006dbf0e21dSBarry Smith   PetscMPIInt     size;
2007a0a83eb5SRémi Lacroix   IS              crowp, lcolp;
200882094794SBarry Smith 
200982094794SBarry Smith   PetscFunctionBegin;
20109566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
201182094794SBarry Smith   /* make a collective version of 'rowp' */
20129566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm));
201382094794SBarry Smith   if (pcomm == comm) {
201482094794SBarry Smith     crowp = rowp;
201582094794SBarry Smith   } else {
20169566063dSJacob Faibussowitsch     PetscCall(ISGetSize(rowp, &nrows));
20179566063dSJacob Faibussowitsch     PetscCall(ISGetIndices(rowp, &rows));
20189566063dSJacob Faibussowitsch     PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp));
20199566063dSJacob Faibussowitsch     PetscCall(ISRestoreIndices(rowp, &rows));
202082094794SBarry Smith   }
20219566063dSJacob Faibussowitsch   PetscCall(ISSetPermutation(crowp));
2022a0a83eb5SRémi Lacroix   /* make a local version of 'colp' */
20239566063dSJacob Faibussowitsch   PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm));
20249566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(pcomm, &size));
2025dbf0e21dSBarry Smith   if (size == 1) {
202682094794SBarry Smith     lcolp = colp;
202782094794SBarry Smith   } else {
20289566063dSJacob Faibussowitsch     PetscCall(ISAllGather(colp, &lcolp));
202982094794SBarry Smith   }
20309566063dSJacob Faibussowitsch   PetscCall(ISSetPermutation(lcolp));
203175f6568bSJed Brown   /* now we just get the submatrix */
20329566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(A, NULL, &clocal_size));
20339566063dSJacob Faibussowitsch   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B));
2034a0a83eb5SRémi Lacroix   /* clean up */
203548a46eb9SPierre Jolivet   if (pcomm != comm) PetscCall(ISDestroy(&crowp));
203648a46eb9SPierre Jolivet   if (size > 1) PetscCall(ISDestroy(&lcolp));
203782094794SBarry Smith   PetscFunctionReturn(0);
203882094794SBarry Smith }
203982094794SBarry Smith 
20409371c9d4SSatish Balay PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) {
20418c7482ecSBarry Smith   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
20428c7482ecSBarry Smith   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ *)baij->B->data;
20438c7482ecSBarry Smith 
20448c7482ecSBarry Smith   PetscFunctionBegin;
204526fbe8dcSKarl Rupp   if (nghosts) *nghosts = B->nbs;
204626fbe8dcSKarl Rupp   if (ghosts) *ghosts = baij->garray;
20478c7482ecSBarry Smith   PetscFunctionReturn(0);
20488c7482ecSBarry Smith }
20498c7482ecSBarry Smith 
20509371c9d4SSatish Balay PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) {
2051f6d58c54SBarry Smith   Mat          B;
2052f6d58c54SBarry Smith   Mat_MPIBAIJ *a  = (Mat_MPIBAIJ *)A->data;
2053f6d58c54SBarry Smith   Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data;
2054f6d58c54SBarry Smith   Mat_SeqAIJ  *b;
2055f4259b30SLisandro Dalcin   PetscMPIInt  size, rank, *recvcounts = NULL, *displs = NULL;
2056f6d58c54SBarry Smith   PetscInt     sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs;
2057f6d58c54SBarry Smith   PetscInt     m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf;
2058f6d58c54SBarry Smith 
2059f6d58c54SBarry Smith   PetscFunctionBegin;
20609566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
20619566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2062f6d58c54SBarry Smith 
2063f6d58c54SBarry Smith   /* ----------------------------------------------------------------
2064f6d58c54SBarry Smith      Tell every processor the number of nonzeros per row
2065f6d58c54SBarry Smith   */
20669566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(A->rmap->N / bs, &lens));
2067ad540459SPierre Jolivet   for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs];
20689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(2 * size, &recvcounts));
2069f6d58c54SBarry Smith   displs = recvcounts + size;
2070f6d58c54SBarry Smith   for (i = 0; i < size; i++) {
2071f6d58c54SBarry Smith     recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs;
2072f6d58c54SBarry Smith     displs[i]     = A->rmap->range[i] / bs;
2073f6d58c54SBarry Smith   }
20749566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2075f6d58c54SBarry Smith   /* ---------------------------------------------------------------
2076f6d58c54SBarry Smith      Create the sequential matrix of the same type as the local block diagonal
2077f6d58c54SBarry Smith   */
20789566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &B));
20799566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE));
20809566063dSJacob Faibussowitsch   PetscCall(MatSetType(B, MATSEQAIJ));
20819566063dSJacob Faibussowitsch   PetscCall(MatSeqAIJSetPreallocation(B, 0, lens));
2082f6d58c54SBarry Smith   b = (Mat_SeqAIJ *)B->data;
2083f6d58c54SBarry Smith 
2084f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2085f6d58c54SBarry Smith     Copy my part of matrix column indices over
2086f6d58c54SBarry Smith   */
2087f6d58c54SBarry Smith   sendcount  = ad->nz + bd->nz;
2088f6d58c54SBarry Smith   jsendbuf   = b->j + b->i[rstarts[rank] / bs];
2089f6d58c54SBarry Smith   a_jsendbuf = ad->j;
2090f6d58c54SBarry Smith   b_jsendbuf = bd->j;
2091f6d58c54SBarry Smith   n          = A->rmap->rend / bs - A->rmap->rstart / bs;
2092f6d58c54SBarry Smith   cnt        = 0;
2093f6d58c54SBarry Smith   for (i = 0; i < n; i++) {
2094f6d58c54SBarry Smith     /* put in lower diagonal portion */
2095f6d58c54SBarry Smith     m = bd->i[i + 1] - bd->i[i];
2096f6d58c54SBarry Smith     while (m > 0) {
2097f6d58c54SBarry Smith       /* is it above diagonal (in bd (compressed) numbering) */
2098f6d58c54SBarry Smith       if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break;
2099f6d58c54SBarry Smith       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2100f6d58c54SBarry Smith       m--;
2101f6d58c54SBarry Smith     }
2102f6d58c54SBarry Smith 
2103f6d58c54SBarry Smith     /* put in diagonal portion */
2104ad540459SPierre Jolivet     for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++;
2105f6d58c54SBarry Smith 
2106f6d58c54SBarry Smith     /* put in upper diagonal portion */
2107ad540459SPierre Jolivet     while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++];
2108f6d58c54SBarry Smith   }
21095f80ce2aSJacob Faibussowitsch   PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt);
2110f6d58c54SBarry Smith 
2111f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2112f6d58c54SBarry Smith     Gather all column indices to all processors
2113f6d58c54SBarry Smith   */
2114f6d58c54SBarry Smith   for (i = 0; i < size; i++) {
2115f6d58c54SBarry Smith     recvcounts[i] = 0;
2116ad540459SPierre Jolivet     for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j];
2117f6d58c54SBarry Smith   }
2118f6d58c54SBarry Smith   displs[0] = 0;
2119ad540459SPierre Jolivet   for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1];
21209566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A)));
2121f6d58c54SBarry Smith   /*--------------------------------------------------------------------
2122f6d58c54SBarry Smith     Assemble the matrix into useable form (note numerical values not yet set)
2123f6d58c54SBarry Smith   */
2124f6d58c54SBarry Smith   /* set the b->ilen (length of each row) values */
21259566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs));
2126f6d58c54SBarry Smith   /* set the b->i indices */
2127f6d58c54SBarry Smith   b->i[0] = 0;
2128ad540459SPierre Jolivet   for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1];
21299566063dSJacob Faibussowitsch   PetscCall(PetscFree(lens));
21309566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
21319566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
21329566063dSJacob Faibussowitsch   PetscCall(PetscFree(recvcounts));
2133f6d58c54SBarry Smith 
2134b94d7dedSBarry Smith   PetscCall(MatPropagateSymmetryOptions(A, B));
2135f6d58c54SBarry Smith   *newmat = B;
2136f6d58c54SBarry Smith   PetscFunctionReturn(0);
2137f6d58c54SBarry Smith }
2138f6d58c54SBarry Smith 
21399371c9d4SSatish Balay PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) {
2140b1a666ecSBarry Smith   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data;
2141f4259b30SLisandro Dalcin   Vec          bb1 = NULL;
2142b1a666ecSBarry Smith 
2143b1a666ecSBarry Smith   PetscFunctionBegin;
2144b1a666ecSBarry Smith   if (flag == SOR_APPLY_UPPER) {
21459566063dSJacob Faibussowitsch     PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2146b1a666ecSBarry Smith     PetscFunctionReturn(0);
2147b1a666ecSBarry Smith   }
2148b1a666ecSBarry Smith 
214948a46eb9SPierre Jolivet   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1));
21504e980039SJed Brown 
2151b1a666ecSBarry Smith   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2152b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
21539566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2154b1a666ecSBarry Smith       its--;
2155b1a666ecSBarry Smith     }
2156b1a666ecSBarry Smith 
2157b1a666ecSBarry Smith     while (its--) {
21589566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
21599566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2160b1a666ecSBarry Smith 
2161b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
21629566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
21639566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2164b1a666ecSBarry Smith 
2165b1a666ecSBarry Smith       /* local sweep */
21669566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx));
2167b1a666ecSBarry Smith     }
2168b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2169b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
21709566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2171b1a666ecSBarry Smith       its--;
2172b1a666ecSBarry Smith     }
2173b1a666ecSBarry Smith     while (its--) {
21749566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
21759566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2176b1a666ecSBarry Smith 
2177b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
21789566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
21799566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2180b1a666ecSBarry Smith 
2181b1a666ecSBarry Smith       /* local sweep */
21829566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx));
2183b1a666ecSBarry Smith     }
2184b1a666ecSBarry Smith   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2185b1a666ecSBarry Smith     if (flag & SOR_ZERO_INITIAL_GUESS) {
21869566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2187b1a666ecSBarry Smith       its--;
2188b1a666ecSBarry Smith     }
2189b1a666ecSBarry Smith     while (its--) {
21909566063dSJacob Faibussowitsch       PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
21919566063dSJacob Faibussowitsch       PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD));
2192b1a666ecSBarry Smith 
2193b1a666ecSBarry Smith       /* update rhs: bb1 = bb - B*x */
21949566063dSJacob Faibussowitsch       PetscCall(VecScale(mat->lvec, -1.0));
21959566063dSJacob Faibussowitsch       PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1));
2196b1a666ecSBarry Smith 
2197b1a666ecSBarry Smith       /* local sweep */
21989566063dSJacob Faibussowitsch       PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx));
2199b1a666ecSBarry Smith     }
2200ce94432eSBarry Smith   } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported");
2201b1a666ecSBarry Smith 
22029566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&bb1));
2203b1a666ecSBarry Smith   PetscFunctionReturn(0);
2204b1a666ecSBarry Smith }
2205b1a666ecSBarry Smith 
22069371c9d4SSatish Balay PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) {
220747f7623dSRémi Lacroix   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data;
2208a873a8cdSSam Reynolds   PetscInt     m, N, i, *garray = aij->garray;
220947f7623dSRémi Lacroix   PetscInt     ib, jb, bs = A->rmap->bs;
221047f7623dSRémi Lacroix   Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data;
221147f7623dSRémi Lacroix   MatScalar   *a_val = a_aij->a;
221247f7623dSRémi Lacroix   Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data;
221347f7623dSRémi Lacroix   MatScalar   *b_val = b_aij->a;
221447f7623dSRémi Lacroix   PetscReal   *work;
221547f7623dSRémi Lacroix 
221647f7623dSRémi Lacroix   PetscFunctionBegin;
22179566063dSJacob Faibussowitsch   PetscCall(MatGetSize(A, &m, &N));
22189566063dSJacob Faibussowitsch   PetscCall(PetscCalloc1(N, &work));
2219857cbf51SRichard Tran Mills   if (type == NORM_2) {
222047f7623dSRémi Lacroix     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
222147f7623dSRémi Lacroix       for (jb = 0; jb < bs; jb++) {
222247f7623dSRémi Lacroix         for (ib = 0; ib < bs; ib++) {
222347f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
222447f7623dSRémi Lacroix           a_val++;
222547f7623dSRémi Lacroix         }
222647f7623dSRémi Lacroix       }
222747f7623dSRémi Lacroix     }
222847f7623dSRémi Lacroix     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
222947f7623dSRémi Lacroix       for (jb = 0; jb < bs; jb++) {
223047f7623dSRémi Lacroix         for (ib = 0; ib < bs; ib++) {
223147f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
223247f7623dSRémi Lacroix           b_val++;
223347f7623dSRémi Lacroix         }
223447f7623dSRémi Lacroix       }
223547f7623dSRémi Lacroix     }
2236857cbf51SRichard Tran Mills   } else if (type == NORM_1) {
223747f7623dSRémi Lacroix     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
223847f7623dSRémi Lacroix       for (jb = 0; jb < bs; jb++) {
223947f7623dSRémi Lacroix         for (ib = 0; ib < bs; ib++) {
224047f7623dSRémi Lacroix           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
224147f7623dSRémi Lacroix           a_val++;
224247f7623dSRémi Lacroix         }
224347f7623dSRémi Lacroix       }
224447f7623dSRémi Lacroix     }
224547f7623dSRémi Lacroix     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
224647f7623dSRémi Lacroix       for (jb = 0; jb < bs; jb++) {
224747f7623dSRémi Lacroix         for (ib = 0; ib < bs; ib++) {
224847f7623dSRémi Lacroix           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
224947f7623dSRémi Lacroix           b_val++;
225047f7623dSRémi Lacroix         }
225147f7623dSRémi Lacroix       }
225247f7623dSRémi Lacroix     }
2253857cbf51SRichard Tran Mills   } else if (type == NORM_INFINITY) {
225447f7623dSRémi Lacroix     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
225547f7623dSRémi Lacroix       for (jb = 0; jb < bs; jb++) {
225647f7623dSRémi Lacroix         for (ib = 0; ib < bs; ib++) {
225747f7623dSRémi Lacroix           int col   = A->cmap->rstart + a_aij->j[i] * bs + jb;
225847f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
225947f7623dSRémi Lacroix           a_val++;
226047f7623dSRémi Lacroix         }
226147f7623dSRémi Lacroix       }
226247f7623dSRémi Lacroix     }
226347f7623dSRémi Lacroix     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
226447f7623dSRémi Lacroix       for (jb = 0; jb < bs; jb++) {
226547f7623dSRémi Lacroix         for (ib = 0; ib < bs; ib++) {
226647f7623dSRémi Lacroix           int col   = garray[b_aij->j[i]] * bs + jb;
226747f7623dSRémi Lacroix           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
226847f7623dSRémi Lacroix           b_val++;
226947f7623dSRémi Lacroix         }
227047f7623dSRémi Lacroix       }
227147f7623dSRémi Lacroix     }
2272857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) {
2273a873a8cdSSam Reynolds     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2274a873a8cdSSam Reynolds       for (jb = 0; jb < bs; jb++) {
2275a873a8cdSSam Reynolds         for (ib = 0; ib < bs; ib++) {
2276857cbf51SRichard Tran Mills           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val);
2277a873a8cdSSam Reynolds           a_val++;
2278a873a8cdSSam Reynolds         }
2279a873a8cdSSam Reynolds       }
2280a873a8cdSSam Reynolds     }
2281a873a8cdSSam Reynolds     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2282a873a8cdSSam Reynolds       for (jb = 0; jb < bs; jb++) {
2283a873a8cdSSam Reynolds         for (ib = 0; ib < bs; ib++) {
2284857cbf51SRichard Tran Mills           work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val);
2285a873a8cdSSam Reynolds           b_val++;
2286a873a8cdSSam Reynolds         }
2287a873a8cdSSam Reynolds       }
2288a873a8cdSSam Reynolds     }
2289857cbf51SRichard Tran Mills   } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2290857cbf51SRichard Tran Mills     for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) {
2291857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
2292857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
2293857cbf51SRichard Tran Mills           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val);
2294857cbf51SRichard Tran Mills           a_val++;
2295857cbf51SRichard Tran Mills         }
2296857cbf51SRichard Tran Mills       }
2297857cbf51SRichard Tran Mills     }
2298857cbf51SRichard Tran Mills     for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) {
2299857cbf51SRichard Tran Mills       for (jb = 0; jb < bs; jb++) {
2300857cbf51SRichard Tran Mills         for (ib = 0; ib < bs; ib++) {
2301857cbf51SRichard Tran Mills           work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val);
2302857cbf51SRichard Tran Mills           b_val++;
2303857cbf51SRichard Tran Mills         }
2304857cbf51SRichard Tran Mills       }
2305857cbf51SRichard Tran Mills     }
2306857cbf51SRichard Tran Mills   } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type");
2307857cbf51SRichard Tran Mills   if (type == NORM_INFINITY) {
23081c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A)));
230947f7623dSRémi Lacroix   } else {
23101c2dc1cbSBarry Smith     PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A)));
231147f7623dSRémi Lacroix   }
23129566063dSJacob Faibussowitsch   PetscCall(PetscFree(work));
2313857cbf51SRichard Tran Mills   if (type == NORM_2) {
2314a873a8cdSSam Reynolds     for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]);
2315857cbf51SRichard Tran Mills   } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) {
2316a873a8cdSSam Reynolds     for (i = 0; i < N; i++) reductions[i] /= m;
231747f7623dSRémi Lacroix   }
231847f7623dSRémi Lacroix   PetscFunctionReturn(0);
231947f7623dSRémi Lacroix }
232047f7623dSRémi Lacroix 
23219371c9d4SSatish Balay PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) {
2322bbead8a2SBarry Smith   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2323bbead8a2SBarry Smith 
2324bbead8a2SBarry Smith   PetscFunctionBegin;
23259566063dSJacob Faibussowitsch   PetscCall(MatInvertBlockDiagonal(a->A, values));
23267b6c816cSBarry Smith   A->factorerrortype             = a->A->factorerrortype;
23277b6c816cSBarry Smith   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
23287b6c816cSBarry Smith   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2329bbead8a2SBarry Smith   PetscFunctionReturn(0);
2330bbead8a2SBarry Smith }
2331bbead8a2SBarry Smith 
23329371c9d4SSatish Balay PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) {
23337d68702bSBarry Smith   Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data;
23346f33a894SBarry Smith   Mat_SeqBAIJ *aij  = (Mat_SeqBAIJ *)maij->A->data;
23357d68702bSBarry Smith 
23367d68702bSBarry Smith   PetscFunctionBegin;
23376f33a894SBarry Smith   if (!Y->preallocated) {
23389566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
23396f33a894SBarry Smith   } else if (!aij->nz) {
2340b83222d8SBarry Smith     PetscInt nonew = aij->nonew;
23419566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
2342b83222d8SBarry Smith     aij->nonew = nonew;
23437d68702bSBarry Smith   }
23449566063dSJacob Faibussowitsch   PetscCall(MatShift_Basic(Y, a));
23457d68702bSBarry Smith   PetscFunctionReturn(0);
23467d68702bSBarry Smith }
23478c7482ecSBarry Smith 
23489371c9d4SSatish Balay PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) {
23493b49f96aSBarry Smith   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
23503b49f96aSBarry Smith 
23513b49f96aSBarry Smith   PetscFunctionBegin;
23525f80ce2aSJacob Faibussowitsch   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
23539566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(a->A, missing, d));
23543b49f96aSBarry Smith   if (d) {
23553b49f96aSBarry Smith     PetscInt rstart;
23569566063dSJacob Faibussowitsch     PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
23573b49f96aSBarry Smith     *d += rstart / A->rmap->bs;
23583b49f96aSBarry Smith   }
23593b49f96aSBarry Smith   PetscFunctionReturn(0);
23603b49f96aSBarry Smith }
23613b49f96aSBarry Smith 
23629371c9d4SSatish Balay PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) {
2363a5b7ff6bSBarry Smith   PetscFunctionBegin;
2364a5b7ff6bSBarry Smith   *a = ((Mat_MPIBAIJ *)A->data)->A;
2365a5b7ff6bSBarry Smith   PetscFunctionReturn(0);
2366a5b7ff6bSBarry Smith }
2367a5b7ff6bSBarry Smith 
236879bdfe76SSatish Balay /* -------------------------------------------------------------------*/
23693964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2370cc2dc46cSBarry Smith                                        MatGetRow_MPIBAIJ,
2371cc2dc46cSBarry Smith                                        MatRestoreRow_MPIBAIJ,
2372cc2dc46cSBarry Smith                                        MatMult_MPIBAIJ,
237397304618SKris Buschelman                                        /* 4*/ MatMultAdd_MPIBAIJ,
23747c922b88SBarry Smith                                        MatMultTranspose_MPIBAIJ,
23757c922b88SBarry Smith                                        MatMultTransposeAdd_MPIBAIJ,
2376f4259b30SLisandro Dalcin                                        NULL,
2377f4259b30SLisandro Dalcin                                        NULL,
2378f4259b30SLisandro Dalcin                                        NULL,
2379f4259b30SLisandro Dalcin                                        /*10*/ NULL,
2380f4259b30SLisandro Dalcin                                        NULL,
2381f4259b30SLisandro Dalcin                                        NULL,
2382b1a666ecSBarry Smith                                        MatSOR_MPIBAIJ,
2383cc2dc46cSBarry Smith                                        MatTranspose_MPIBAIJ,
238497304618SKris Buschelman                                        /*15*/ MatGetInfo_MPIBAIJ,
23857fc3c18eSBarry Smith                                        MatEqual_MPIBAIJ,
2386cc2dc46cSBarry Smith                                        MatGetDiagonal_MPIBAIJ,
2387cc2dc46cSBarry Smith                                        MatDiagonalScale_MPIBAIJ,
2388cc2dc46cSBarry Smith                                        MatNorm_MPIBAIJ,
238997304618SKris Buschelman                                        /*20*/ MatAssemblyBegin_MPIBAIJ,
2390cc2dc46cSBarry Smith                                        MatAssemblyEnd_MPIBAIJ,
2391cc2dc46cSBarry Smith                                        MatSetOption_MPIBAIJ,
2392cc2dc46cSBarry Smith                                        MatZeroEntries_MPIBAIJ,
2393d519adbfSMatthew Knepley                                        /*24*/ MatZeroRows_MPIBAIJ,
2394f4259b30SLisandro Dalcin                                        NULL,
2395f4259b30SLisandro Dalcin                                        NULL,
2396f4259b30SLisandro Dalcin                                        NULL,
2397f4259b30SLisandro Dalcin                                        NULL,
23984994cf47SJed Brown                                        /*29*/ MatSetUp_MPIBAIJ,
2399f4259b30SLisandro Dalcin                                        NULL,
2400f4259b30SLisandro Dalcin                                        NULL,
2401a5b7ff6bSBarry Smith                                        MatGetDiagonalBlock_MPIBAIJ,
2402f4259b30SLisandro Dalcin                                        NULL,
2403d519adbfSMatthew Knepley                                        /*34*/ MatDuplicate_MPIBAIJ,
2404f4259b30SLisandro Dalcin                                        NULL,
2405f4259b30SLisandro Dalcin                                        NULL,
2406f4259b30SLisandro Dalcin                                        NULL,
2407f4259b30SLisandro Dalcin                                        NULL,
2408d519adbfSMatthew Knepley                                        /*39*/ MatAXPY_MPIBAIJ,
24097dae84e0SHong Zhang                                        MatCreateSubMatrices_MPIBAIJ,
2410cc2dc46cSBarry Smith                                        MatIncreaseOverlap_MPIBAIJ,
2411cc2dc46cSBarry Smith                                        MatGetValues_MPIBAIJ,
24123c896bc6SHong Zhang                                        MatCopy_MPIBAIJ,
2413f4259b30SLisandro Dalcin                                        /*44*/ NULL,
2414cc2dc46cSBarry Smith                                        MatScale_MPIBAIJ,
24157d68702bSBarry Smith                                        MatShift_MPIBAIJ,
2416f4259b30SLisandro Dalcin                                        NULL,
24176f0a72daSMatthew G. Knepley                                        MatZeroRowsColumns_MPIBAIJ,
2418f4259b30SLisandro Dalcin                                        /*49*/ NULL,
2419f4259b30SLisandro Dalcin                                        NULL,
2420f4259b30SLisandro Dalcin                                        NULL,
2421f4259b30SLisandro Dalcin                                        NULL,
2422f4259b30SLisandro Dalcin                                        NULL,
242393dfae19SHong Zhang                                        /*54*/ MatFDColoringCreate_MPIXAIJ,
2424f4259b30SLisandro Dalcin                                        NULL,
2425cc2dc46cSBarry Smith                                        MatSetUnfactored_MPIBAIJ,
242682094794SBarry Smith                                        MatPermute_MPIBAIJ,
2427cc2dc46cSBarry Smith                                        MatSetValuesBlocked_MPIBAIJ,
24287dae84e0SHong Zhang                                        /*59*/ MatCreateSubMatrix_MPIBAIJ,
2429f14a1c24SBarry Smith                                        MatDestroy_MPIBAIJ,
2430f14a1c24SBarry Smith                                        MatView_MPIBAIJ,
2431f4259b30SLisandro Dalcin                                        NULL,
2432f4259b30SLisandro Dalcin                                        NULL,
2433f4259b30SLisandro Dalcin                                        /*64*/ NULL,
2434f4259b30SLisandro Dalcin                                        NULL,
2435f4259b30SLisandro Dalcin                                        NULL,
2436f4259b30SLisandro Dalcin                                        NULL,
2437f4259b30SLisandro Dalcin                                        NULL,
2438d519adbfSMatthew Knepley                                        /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2439f4259b30SLisandro Dalcin                                        NULL,
2440f4259b30SLisandro Dalcin                                        NULL,
2441f4259b30SLisandro Dalcin                                        NULL,
2442f4259b30SLisandro Dalcin                                        NULL,
2443f4259b30SLisandro Dalcin                                        /*74*/ NULL,
2444f6d58c54SBarry Smith                                        MatFDColoringApply_BAIJ,
2445f4259b30SLisandro Dalcin                                        NULL,
2446f4259b30SLisandro Dalcin                                        NULL,
2447f4259b30SLisandro Dalcin                                        NULL,
2448f4259b30SLisandro Dalcin                                        /*79*/ NULL,
2449f4259b30SLisandro Dalcin                                        NULL,
2450f4259b30SLisandro Dalcin                                        NULL,
2451f4259b30SLisandro Dalcin                                        NULL,
24525bba2384SShri Abhyankar                                        MatLoad_MPIBAIJ,
2453f4259b30SLisandro Dalcin                                        /*84*/ NULL,
2454f4259b30SLisandro Dalcin                                        NULL,
2455f4259b30SLisandro Dalcin                                        NULL,
2456f4259b30SLisandro Dalcin                                        NULL,
2457f4259b30SLisandro Dalcin                                        NULL,
2458f4259b30SLisandro Dalcin                                        /*89*/ NULL,
2459f4259b30SLisandro Dalcin                                        NULL,
2460f4259b30SLisandro Dalcin                                        NULL,
2461f4259b30SLisandro Dalcin                                        NULL,
2462f4259b30SLisandro Dalcin                                        NULL,
2463f4259b30SLisandro Dalcin                                        /*94*/ NULL,
2464f4259b30SLisandro Dalcin                                        NULL,
2465f4259b30SLisandro Dalcin                                        NULL,
2466f4259b30SLisandro Dalcin                                        NULL,
2467f4259b30SLisandro Dalcin                                        NULL,
2468f4259b30SLisandro Dalcin                                        /*99*/ NULL,
2469f4259b30SLisandro Dalcin                                        NULL,
2470f4259b30SLisandro Dalcin                                        NULL,
24712726fb6dSPierre Jolivet                                        MatConjugate_MPIBAIJ,
2472f4259b30SLisandro Dalcin                                        NULL,
2473f4259b30SLisandro Dalcin                                        /*104*/ NULL,
247499cafbc1SBarry Smith                                        MatRealPart_MPIBAIJ,
24758c7482ecSBarry Smith                                        MatImaginaryPart_MPIBAIJ,
2476f4259b30SLisandro Dalcin                                        NULL,
2477f4259b30SLisandro Dalcin                                        NULL,
2478f4259b30SLisandro Dalcin                                        /*109*/ NULL,
2479f4259b30SLisandro Dalcin                                        NULL,
2480f4259b30SLisandro Dalcin                                        NULL,
2481f4259b30SLisandro Dalcin                                        NULL,
24823b49f96aSBarry Smith                                        MatMissingDiagonal_MPIBAIJ,
2483d1adec66SJed Brown                                        /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ,
2484f4259b30SLisandro Dalcin                                        NULL,
24854683f7a4SShri Abhyankar                                        MatGetGhosts_MPIBAIJ,
2486f4259b30SLisandro Dalcin                                        NULL,
2487f4259b30SLisandro Dalcin                                        NULL,
2488f4259b30SLisandro Dalcin                                        /*119*/ NULL,
2489f4259b30SLisandro Dalcin                                        NULL,
2490f4259b30SLisandro Dalcin                                        NULL,
2491f4259b30SLisandro Dalcin                                        NULL,
2492e8271787SHong Zhang                                        MatGetMultiProcBlock_MPIBAIJ,
2493f4259b30SLisandro Dalcin                                        /*124*/ NULL,
2494a873a8cdSSam Reynolds                                        MatGetColumnReductions_MPIBAIJ,
24953964eb88SJed Brown                                        MatInvertBlockDiagonal_MPIBAIJ,
2496f4259b30SLisandro Dalcin                                        NULL,
2497f4259b30SLisandro Dalcin                                        NULL,
2498f4259b30SLisandro Dalcin                                        /*129*/ NULL,
2499f4259b30SLisandro Dalcin                                        NULL,
2500f4259b30SLisandro Dalcin                                        NULL,
2501f4259b30SLisandro Dalcin                                        NULL,
2502f4259b30SLisandro Dalcin                                        NULL,
2503f4259b30SLisandro Dalcin                                        /*134*/ NULL,
2504f4259b30SLisandro Dalcin                                        NULL,
2505f4259b30SLisandro Dalcin                                        NULL,
2506f4259b30SLisandro Dalcin                                        NULL,
2507f4259b30SLisandro Dalcin                                        NULL,
250846533700Sstefano_zampini                                        /*139*/ MatSetBlockSizes_Default,
2509f4259b30SLisandro Dalcin                                        NULL,
2510f4259b30SLisandro Dalcin                                        NULL,
2511bdf6f3fcSHong Zhang                                        MatFDColoringSetUp_MPIXAIJ,
2512f4259b30SLisandro Dalcin                                        NULL,
2513d70f29a3SPierre Jolivet                                        /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ,
2514d70f29a3SPierre Jolivet                                        NULL,
2515d70f29a3SPierre Jolivet                                        NULL,
251699a7f59eSMark Adams                                        NULL,
251799a7f59eSMark Adams                                        NULL,
25187fb60732SBarry Smith                                        NULL,
25199371c9d4SSatish Balay                                        /*150*/ NULL};
252079bdfe76SSatish Balay 
2521cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *);
2522c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *);
2523d94109b8SHong Zhang 
25249371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) {
2525b8d659d7SLisandro Dalcin   PetscInt        m, rstart, cstart, cend;
2526f4259b30SLisandro Dalcin   PetscInt        i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
2527f4259b30SLisandro Dalcin   const PetscInt *JJ          = NULL;
2528f4259b30SLisandro Dalcin   PetscScalar    *values      = NULL;
2529d47bf9aaSJed Brown   PetscBool       roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented;
25303bd0feecSPierre Jolivet   PetscBool       nooffprocentries;
2531aac34f13SBarry Smith 
2532aac34f13SBarry Smith   PetscFunctionBegin;
25339566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
25349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
25359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
25369566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
25379566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2538d0f46423SBarry Smith   m      = B->rmap->n / bs;
2539d0f46423SBarry Smith   rstart = B->rmap->rstart / bs;
2540d0f46423SBarry Smith   cstart = B->cmap->rstart / bs;
2541d0f46423SBarry Smith   cend   = B->cmap->rend / bs;
2542b8d659d7SLisandro Dalcin 
25435f80ce2aSJacob Faibussowitsch   PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
25449566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2545aac34f13SBarry Smith   for (i = 0; i < m; i++) {
2546cf12db73SBarry Smith     nz = ii[i + 1] - ii[i];
25475f80ce2aSJacob Faibussowitsch     PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2548b8d659d7SLisandro Dalcin     nz_max = PetscMax(nz_max, nz);
254937cd3c0dSBarry Smith     dlen   = 0;
255037cd3c0dSBarry Smith     olen   = 0;
2551cf12db73SBarry Smith     JJ     = jj + ii[i];
2552b8d659d7SLisandro Dalcin     for (j = 0; j < nz; j++) {
255337cd3c0dSBarry Smith       if (*JJ < cstart || *JJ >= cend) olen++;
255437cd3c0dSBarry Smith       else dlen++;
2555aac34f13SBarry Smith       JJ++;
2556aac34f13SBarry Smith     }
255737cd3c0dSBarry Smith     d_nnz[i] = dlen;
255837cd3c0dSBarry Smith     o_nnz[i] = olen;
2559aac34f13SBarry Smith   }
25609566063dSJacob Faibussowitsch   PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
25619566063dSJacob Faibussowitsch   PetscCall(PetscFree2(d_nnz, o_nnz));
2562aac34f13SBarry Smith 
2563b8d659d7SLisandro Dalcin   values = (PetscScalar *)V;
256448a46eb9SPierre Jolivet   if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2565b8d659d7SLisandro Dalcin   for (i = 0; i < m; i++) {
2566b8d659d7SLisandro Dalcin     PetscInt        row   = i + rstart;
2567cf12db73SBarry Smith     PetscInt        ncols = ii[i + 1] - ii[i];
2568cf12db73SBarry Smith     const PetscInt *icols = jj + ii[i];
2569bb80cfbbSStefano Zampini     if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2570cf12db73SBarry Smith       const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
25719566063dSJacob Faibussowitsch       PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
25723adadaf3SJed Brown     } else { /* block ordering does not match so we can only insert one block at a time. */
25733adadaf3SJed Brown       PetscInt j;
25743adadaf3SJed Brown       for (j = 0; j < ncols; j++) {
25753adadaf3SJed Brown         const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
25769566063dSJacob Faibussowitsch         PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
25773adadaf3SJed Brown       }
25783adadaf3SJed Brown     }
2579aac34f13SBarry Smith   }
2580aac34f13SBarry Smith 
25819566063dSJacob Faibussowitsch   if (!V) PetscCall(PetscFree(values));
25823bd0feecSPierre Jolivet   nooffprocentries    = B->nooffprocentries;
25833bd0feecSPierre Jolivet   B->nooffprocentries = PETSC_TRUE;
25849566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
25859566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
25863bd0feecSPierre Jolivet   B->nooffprocentries = nooffprocentries;
25873bd0feecSPierre Jolivet 
25889566063dSJacob Faibussowitsch   PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2589aac34f13SBarry Smith   PetscFunctionReturn(0);
2590aac34f13SBarry Smith }
2591aac34f13SBarry Smith 
2592aac34f13SBarry Smith /*@C
259311a5261eSBarry Smith    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values
2594aac34f13SBarry Smith 
2595d083f849SBarry Smith    Collective
2596aac34f13SBarry Smith 
2597aac34f13SBarry Smith    Input Parameters:
25981c4f3114SJed Brown +  B - the matrix
2599dfb205c3SBarry Smith .  bs - the block size
2600aac34f13SBarry Smith .  i - the indices into j for the start of each local row (starts with zero)
2601aac34f13SBarry Smith .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2602aac34f13SBarry Smith -  v - optional values in the matrix
2603aac34f13SBarry Smith 
2604664954b6SBarry Smith    Level: advanced
2605aac34f13SBarry Smith 
260695452b02SPatrick Sanan    Notes:
260711a5261eSBarry Smith     The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`.  For example, C programs
260811a5261eSBarry Smith    may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is
26093adadaf3SJed Brown    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
261011a5261eSBarry Smith    `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
26113adadaf3SJed Brown    block column and the second index is over columns within a block.
26123adadaf3SJed Brown 
2613664954b6SBarry Smith    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2614664954b6SBarry Smith 
2615db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ`
2616aac34f13SBarry Smith @*/
26179371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) {
2618aac34f13SBarry Smith   PetscFunctionBegin;
26196ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
26206ba663aaSJed Brown   PetscValidType(B, 1);
26216ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
2622cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2623aac34f13SBarry Smith   PetscFunctionReturn(0);
2624aac34f13SBarry Smith }
2625aac34f13SBarry Smith 
26269371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) {
2627a23d5eceSKris Buschelman   Mat_MPIBAIJ *b;
2628535b19f3SBarry Smith   PetscInt     i;
26295d2a9ed1SStefano Zampini   PetscMPIInt  size;
2630a23d5eceSKris Buschelman 
2631a23d5eceSKris Buschelman   PetscFunctionBegin;
26329566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
26339566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->rmap));
26349566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(B->cmap));
26359566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2636899cda47SBarry Smith 
2637a23d5eceSKris Buschelman   if (d_nnz) {
2638ad540459SPierre Jolivet     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]);
2639a23d5eceSKris Buschelman   }
2640a23d5eceSKris Buschelman   if (o_nnz) {
2641ad540459SPierre Jolivet     for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]);
2642a23d5eceSKris Buschelman   }
2643a23d5eceSKris Buschelman 
2644a23d5eceSKris Buschelman   b      = (Mat_MPIBAIJ *)B->data;
2645a23d5eceSKris Buschelman   b->bs2 = bs * bs;
2646d0f46423SBarry Smith   b->mbs = B->rmap->n / bs;
2647d0f46423SBarry Smith   b->nbs = B->cmap->n / bs;
2648d0f46423SBarry Smith   b->Mbs = B->rmap->N / bs;
2649d0f46423SBarry Smith   b->Nbs = B->cmap->N / bs;
2650a23d5eceSKris Buschelman 
2651ad540459SPierre Jolivet   for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
2652d0f46423SBarry Smith   b->rstartbs = B->rmap->rstart / bs;
2653d0f46423SBarry Smith   b->rendbs   = B->rmap->rend / bs;
2654d0f46423SBarry Smith   b->cstartbs = B->cmap->rstart / bs;
2655d0f46423SBarry Smith   b->cendbs   = B->cmap->rend / bs;
2656a23d5eceSKris Buschelman 
2657cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE)
26589566063dSJacob Faibussowitsch   PetscCall(PetscTableDestroy(&b->colmap));
2659cb7b82ddSBarry Smith #else
26609566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->colmap));
2661cb7b82ddSBarry Smith #endif
26629566063dSJacob Faibussowitsch   PetscCall(PetscFree(b->garray));
26639566063dSJacob Faibussowitsch   PetscCall(VecDestroy(&b->lvec));
26649566063dSJacob Faibussowitsch   PetscCall(VecScatterDestroy(&b->Mvctx));
2665cb7b82ddSBarry Smith 
2666cb7b82ddSBarry Smith   /* Because the B will have been resized we simply destroy it and create a new one each time */
26679566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
26689566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&b->B));
26699566063dSJacob Faibussowitsch   PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
26709566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
26719566063dSJacob Faibussowitsch   PetscCall(MatSetType(b->B, MATSEQBAIJ));
2672cb7b82ddSBarry Smith 
2673526dfc15SBarry Smith   if (!B->preallocated) {
26749566063dSJacob Faibussowitsch     PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
26759566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
26769566063dSJacob Faibussowitsch     PetscCall(MatSetType(b->A, MATSEQBAIJ));
26779566063dSJacob Faibussowitsch     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
2678526dfc15SBarry Smith   }
2679a23d5eceSKris Buschelman 
26809566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
26819566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
2682526dfc15SBarry Smith   B->preallocated  = PETSC_TRUE;
2683cb7b82ddSBarry Smith   B->was_assembled = PETSC_FALSE;
2684cb7b82ddSBarry Smith   B->assembled     = PETSC_FALSE;
2685a23d5eceSKris Buschelman   PetscFunctionReturn(0);
2686a23d5eceSKris Buschelman }
2687a23d5eceSKris Buschelman 
26887087cfbeSBarry Smith extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec);
26897087cfbeSBarry Smith extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal);
26905bf65638SKris Buschelman 
26919371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) {
269282094794SBarry Smith   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
269382094794SBarry Smith   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data;
269482094794SBarry Smith   PetscInt        M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs;
269582094794SBarry Smith   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
269682094794SBarry Smith 
269782094794SBarry Smith   PetscFunctionBegin;
26989566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(M + 1, &ii));
269982094794SBarry Smith   ii[0] = 0;
270082094794SBarry Smith   for (i = 0; i < M; i++) {
270108401ef6SPierre Jolivet     PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]);
270208401ef6SPierre Jolivet     PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]);
270382094794SBarry Smith     ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i];
27045ee9ba1cSJed Brown     /* remove one from count of matrix has diagonal */
27055ee9ba1cSJed Brown     for (j = id[i]; j < id[i + 1]; j++) {
27069371c9d4SSatish Balay       if (jd[j] == i) {
27079371c9d4SSatish Balay         ii[i + 1]--;
27089371c9d4SSatish Balay         break;
27099371c9d4SSatish Balay       }
27105ee9ba1cSJed Brown     }
271182094794SBarry Smith   }
27129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(ii[M], &jj));
271382094794SBarry Smith   cnt = 0;
271482094794SBarry Smith   for (i = 0; i < M; i++) {
271582094794SBarry Smith     for (j = io[i]; j < io[i + 1]; j++) {
271682094794SBarry Smith       if (garray[jo[j]] > rstart) break;
271782094794SBarry Smith       jj[cnt++] = garray[jo[j]];
271882094794SBarry Smith     }
271982094794SBarry Smith     for (k = id[i]; k < id[i + 1]; k++) {
2720ad540459SPierre Jolivet       if (jd[k] != i) jj[cnt++] = rstart + jd[k];
272182094794SBarry Smith     }
2722ad540459SPierre Jolivet     for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]];
272382094794SBarry Smith   }
27249566063dSJacob Faibussowitsch   PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj));
272582094794SBarry Smith   PetscFunctionReturn(0);
272682094794SBarry Smith }
272782094794SBarry Smith 
2728c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h>
272962471d69SBarry Smith 
2730cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *);
2731b2573a8aSBarry Smith 
27329371c9d4SSatish Balay PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) {
273362471d69SBarry Smith   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
273485a69837SSatish Balay   Mat_MPIAIJ  *b;
27355f80ce2aSJacob Faibussowitsch   Mat          B;
273662471d69SBarry Smith 
273762471d69SBarry Smith   PetscFunctionBegin;
27385f80ce2aSJacob Faibussowitsch   PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled");
273962471d69SBarry Smith 
27400f6d62edSLisandro Dalcin   if (reuse == MAT_REUSE_MATRIX) {
27410f6d62edSLisandro Dalcin     B = *newmat;
27420f6d62edSLisandro Dalcin   } else {
27439566063dSJacob Faibussowitsch     PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
27449566063dSJacob Faibussowitsch     PetscCall(MatSetType(B, MATMPIAIJ));
27459566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
27469566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs));
27479566063dSJacob Faibussowitsch     PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL));
27489566063dSJacob Faibussowitsch     PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL));
27490f6d62edSLisandro Dalcin   }
275062471d69SBarry Smith   b = (Mat_MPIAIJ *)B->data;
275162471d69SBarry Smith 
27520f6d62edSLisandro Dalcin   if (reuse == MAT_REUSE_MATRIX) {
27539566063dSJacob Faibussowitsch     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A));
27549566063dSJacob Faibussowitsch     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B));
27550f6d62edSLisandro Dalcin   } else {
27569566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&b->A));
27579566063dSJacob Faibussowitsch     PetscCall(MatDestroy(&b->B));
27589566063dSJacob Faibussowitsch     PetscCall(MatDisAssemble_MPIBAIJ(A));
27599566063dSJacob Faibussowitsch     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A));
27609566063dSJacob Faibussowitsch     PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B));
27619566063dSJacob Faibussowitsch     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
27629566063dSJacob Faibussowitsch     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
27630f6d62edSLisandro Dalcin   }
27649566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
27659566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
27660f6d62edSLisandro Dalcin 
2767511c6705SHong Zhang   if (reuse == MAT_INPLACE_MATRIX) {
27689566063dSJacob Faibussowitsch     PetscCall(MatHeaderReplace(A, &B));
276962471d69SBarry Smith   } else {
277062471d69SBarry Smith     *newmat = B;
277162471d69SBarry Smith   }
277262471d69SBarry Smith   PetscFunctionReturn(0);
277362471d69SBarry Smith }
277462471d69SBarry Smith 
27750bad9183SKris Buschelman /*MC
2776fafad747SKris Buschelman    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
27770bad9183SKris Buschelman 
27780bad9183SKris Buschelman    Options Database Keys:
277911a5261eSBarry Smith + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()`
27808c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix
27816679dcc1SBarry Smith . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use  (0 often indicates using BLAS)
278267b8a455SSatish Balay - -mat_use_hash_table <fact> - set hash table factor
27830bad9183SKris Buschelman 
27840bad9183SKris Buschelman    Level: beginner
27850cd7f59aSBarry Smith 
278611a5261eSBarry Smith    Note:
278711a5261eSBarry Smith     `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no
278811a5261eSBarry Smith     space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored
27890bad9183SKris Buschelman 
279011a5261eSBarry Smith .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ`
27910bad9183SKris Buschelman M*/
27920bad9183SKris Buschelman 
2793cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *);
2794c0cdd4a1SDahai Guo 
27959371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) {
2796273d9f13SBarry Smith   Mat_MPIBAIJ *b;
279794ae4db5SBarry Smith   PetscBool    flg = PETSC_FALSE;
2798273d9f13SBarry Smith 
2799273d9f13SBarry Smith   PetscFunctionBegin;
2800*4dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&b));
280182502324SSatish Balay   B->data = (void *)b;
280282502324SSatish Balay 
28039566063dSJacob Faibussowitsch   PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2804273d9f13SBarry Smith   B->assembled = PETSC_FALSE;
2805273d9f13SBarry Smith 
2806273d9f13SBarry Smith   B->insertmode = NOT_SET_VALUES;
28079566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
28089566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2809273d9f13SBarry Smith 
2810273d9f13SBarry Smith   /* build local table of row and column ownerships */
28119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(b->size + 1, &b->rangebs));
2812273d9f13SBarry Smith 
2813273d9f13SBarry Smith   /* build cache for off array entries formed */
28149566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
281526fbe8dcSKarl Rupp 
2816273d9f13SBarry Smith   b->donotstash  = PETSC_FALSE;
28170298fd71SBarry Smith   b->colmap      = NULL;
28180298fd71SBarry Smith   b->garray      = NULL;
2819273d9f13SBarry Smith   b->roworiented = PETSC_TRUE;
2820273d9f13SBarry Smith 
2821273d9f13SBarry Smith   /* stuff used in block assembly */
2822f4259b30SLisandro Dalcin   b->barray = NULL;
2823273d9f13SBarry Smith 
2824273d9f13SBarry Smith   /* stuff used for matrix vector multiply */
2825f4259b30SLisandro Dalcin   b->lvec  = NULL;
2826f4259b30SLisandro Dalcin   b->Mvctx = NULL;
2827273d9f13SBarry Smith 
2828273d9f13SBarry Smith   /* stuff for MatGetRow() */
2829f4259b30SLisandro Dalcin   b->rowindices   = NULL;
2830f4259b30SLisandro Dalcin   b->rowvalues    = NULL;
2831273d9f13SBarry Smith   b->getrowactive = PETSC_FALSE;
2832273d9f13SBarry Smith 
2833273d9f13SBarry Smith   /* hash table stuff */
2834f4259b30SLisandro Dalcin   b->ht           = NULL;
2835f4259b30SLisandro Dalcin   b->hd           = NULL;
2836273d9f13SBarry Smith   b->ht_size      = 0;
2837273d9f13SBarry Smith   b->ht_flag      = PETSC_FALSE;
2838273d9f13SBarry Smith   b->ht_fact      = 0;
2839273d9f13SBarry Smith   b->ht_total_ct  = 0;
2840273d9f13SBarry Smith   b->ht_insert_ct = 0;
2841273d9f13SBarry Smith 
28427dae84e0SHong Zhang   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
28437a868f3eSHong Zhang   b->ijonly = PETSC_FALSE;
28447a868f3eSHong Zhang 
28459566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj));
28469566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ));
28479566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ));
28487ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE)
28499566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE));
28507ea3e4caSstefano_zampini #endif
28519566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ));
28529566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ));
28539566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ));
28549566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ));
28559566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ));
28569566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ));
28579566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS));
28589566063dSJacob Faibussowitsch   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ));
285994ae4db5SBarry Smith 
2860d0609cedSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat");
28619566063dSJacob Faibussowitsch   PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg));
286294ae4db5SBarry Smith   if (flg) {
286394ae4db5SBarry Smith     PetscReal fact = 1.39;
28649566063dSJacob Faibussowitsch     PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
28659566063dSJacob Faibussowitsch     PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
286694ae4db5SBarry Smith     if (fact <= 1.0) fact = 1.39;
28679566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
28689566063dSJacob Faibussowitsch     PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
286994ae4db5SBarry Smith   }
2870d0609cedSBarry Smith   PetscOptionsEnd();
2871273d9f13SBarry Smith   PetscFunctionReturn(0);
2872273d9f13SBarry Smith }
2873273d9f13SBarry Smith 
2874209238afSKris Buschelman /*MC
2875002d173eSKris Buschelman    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2876209238afSKris Buschelman 
287711a5261eSBarry Smith    This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator,
287811a5261eSBarry Smith    and `MATMPIBAIJ` otherwise.
2879209238afSKris Buschelman 
2880209238afSKris Buschelman    Options Database Keys:
288111a5261eSBarry Smith . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()`
2882209238afSKris Buschelman 
2883209238afSKris Buschelman   Level: beginner
2884209238afSKris Buschelman 
2885c2e3fba1SPatrick Sanan .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
2886209238afSKris Buschelman M*/
2887209238afSKris Buschelman 
2888273d9f13SBarry Smith /*@C
288911a5261eSBarry Smith    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format
2890273d9f13SBarry Smith    (block compressed row).  For good matrix assembly performance
2891273d9f13SBarry Smith    the user should preallocate the matrix storage by setting the parameters
2892273d9f13SBarry Smith    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2893273d9f13SBarry Smith    performance can be increased by more than a factor of 50.
2894273d9f13SBarry Smith 
2895273d9f13SBarry Smith    Collective on Mat
2896273d9f13SBarry Smith 
2897273d9f13SBarry Smith    Input Parameters:
28981c4f3114SJed Brown +  B - the matrix
289911a5261eSBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2900bb7ae925SBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2901273d9f13SBarry Smith .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2902273d9f13SBarry Smith            submatrix  (same for all local rows)
2903273d9f13SBarry Smith .  d_nnz - array containing the number of block nonzeros in the various block rows
2904273d9f13SBarry Smith            of the in diagonal portion of the local (possibly different for each block
29050298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
290695742e49SBarry Smith            set it even if it is zero.
2907273d9f13SBarry Smith .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2908273d9f13SBarry Smith            submatrix (same for all local rows).
2909273d9f13SBarry Smith -  o_nnz - array containing the number of nonzeros in the various block rows of the
2910273d9f13SBarry Smith            off-diagonal portion of the local submatrix (possibly different for
29110298fd71SBarry Smith            each block row) or NULL.
2912273d9f13SBarry Smith 
291349a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
2914273d9f13SBarry Smith 
2915273d9f13SBarry Smith    Options Database Keys:
29168c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
291767b8a455SSatish Balay -   -mat_use_hash_table <fact> - set hash table factor
2918273d9f13SBarry Smith 
2919273d9f13SBarry Smith    Notes:
292011a5261eSBarry Smith    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
2921273d9f13SBarry Smith    than it must be used on all processors that share the object for that argument.
2922273d9f13SBarry Smith 
2923273d9f13SBarry Smith    Storage Information:
2924273d9f13SBarry Smith    For a square global matrix we define each processor's diagonal portion
2925273d9f13SBarry Smith    to be its local rows and the corresponding columns (a square submatrix);
2926273d9f13SBarry Smith    each processor's off-diagonal portion encompasses the remainder of the
2927273d9f13SBarry Smith    local matrix (a rectangular submatrix).
2928273d9f13SBarry Smith 
2929273d9f13SBarry Smith    The user can specify preallocated storage for the diagonal part of
2930273d9f13SBarry Smith    the local submatrix with either d_nz or d_nnz (not both).  Set
29310298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2932273d9f13SBarry Smith    memory allocation.  Likewise, specify preallocated storage for the
2933273d9f13SBarry Smith    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2934273d9f13SBarry Smith 
2935273d9f13SBarry Smith    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2936273d9f13SBarry Smith    the figure below we depict these three local rows and all columns (0-11).
2937273d9f13SBarry Smith 
2938273d9f13SBarry Smith .vb
2939273d9f13SBarry Smith            0 1 2 3 4 5 6 7 8 9 10 11
2940a4b1a0f6SJed Brown           --------------------------
2941273d9f13SBarry Smith    row 3  |o o o d d d o o o o  o  o
2942273d9f13SBarry Smith    row 4  |o o o d d d o o o o  o  o
2943273d9f13SBarry Smith    row 5  |o o o d d d o o o o  o  o
2944a4b1a0f6SJed Brown           --------------------------
2945273d9f13SBarry Smith .ve
2946273d9f13SBarry Smith 
2947273d9f13SBarry Smith    Thus, any entries in the d locations are stored in the d (diagonal)
2948273d9f13SBarry Smith    submatrix, and any entries in the o locations are stored in the
2949273d9f13SBarry Smith    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
295011a5261eSBarry Smith    stored simply in the `MATSEQBAIJ` format for compressed row storage.
2951273d9f13SBarry Smith 
2952273d9f13SBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2953273d9f13SBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2954273d9f13SBarry Smith    In general, for PDE problems in which most nonzeros are near the diagonal,
2955273d9f13SBarry Smith    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2956273d9f13SBarry Smith    or you will get TERRIBLE performance; see the users' manual chapter on
2957273d9f13SBarry Smith    matrices.
2958273d9f13SBarry Smith 
295911a5261eSBarry Smith    You can call `MatGetInfo()` to get information on how effective the preallocation was;
2960aa95bbe8SBarry Smith    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2961aa95bbe8SBarry Smith    You can also run with the option -info and look for messages with the string
2962aa95bbe8SBarry Smith    malloc in them to see if additional memory allocation was needed.
2963aa95bbe8SBarry Smith 
2964273d9f13SBarry Smith    Level: intermediate
2965273d9f13SBarry Smith 
296611a5261eSBarry Smith .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()`
2967273d9f13SBarry Smith @*/
29689371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) {
2969273d9f13SBarry Smith   PetscFunctionBegin;
29706ba663aaSJed Brown   PetscValidHeaderSpecific(B, MAT_CLASSID, 1);
29716ba663aaSJed Brown   PetscValidType(B, 1);
29726ba663aaSJed Brown   PetscValidLogicalCollectiveInt(B, bs, 2);
2973cac4c232SBarry Smith   PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2974273d9f13SBarry Smith   PetscFunctionReturn(0);
2975273d9f13SBarry Smith }
2976273d9f13SBarry Smith 
297779bdfe76SSatish Balay /*@C
297811a5261eSBarry Smith    MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format
297979bdfe76SSatish Balay    (block compressed row).  For good matrix assembly performance
298079bdfe76SSatish Balay    the user should preallocate the matrix storage by setting the parameters
298179bdfe76SSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
298279bdfe76SSatish Balay    performance can be increased by more than a factor of 50.
298379bdfe76SSatish Balay 
2984d083f849SBarry Smith    Collective
2985db81eaa0SLois Curfman McInnes 
298679bdfe76SSatish Balay    Input Parameters:
2987db81eaa0SLois Curfman McInnes +  comm - MPI communicator
298811a5261eSBarry Smith .  bs   - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
298911a5261eSBarry Smith           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
299011a5261eSBarry Smith .  m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given)
299192e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
299292e8d321SLois Curfman McInnes            y vector for the matrix-vector product y = Ax.
299311a5261eSBarry Smith .  n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given)
299492e8d321SLois Curfman McInnes            This value should be the same as the local size used in creating the
299592e8d321SLois Curfman McInnes            x vector for the matrix-vector product y = Ax.
299611a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
299711a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
299847a75d0bSBarry Smith .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
299979bdfe76SSatish Balay            submatrix  (same for all local rows)
300047a75d0bSBarry Smith .  d_nnz - array containing the number of nonzero blocks in the various block rows
300192e8d321SLois Curfman McInnes            of the in diagonal portion of the local (possibly different for each block
30020298fd71SBarry Smith            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
300395742e49SBarry Smith            and set it even if it is zero.
300447a75d0bSBarry Smith .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
300579bdfe76SSatish Balay            submatrix (same for all local rows).
300647a75d0bSBarry Smith -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
300792e8d321SLois Curfman McInnes            off-diagonal portion of the local submatrix (possibly different for
30080298fd71SBarry Smith            each block row) or NULL.
300979bdfe76SSatish Balay 
301079bdfe76SSatish Balay    Output Parameter:
301179bdfe76SSatish Balay .  A - the matrix
301279bdfe76SSatish Balay 
3013db81eaa0SLois Curfman McInnes    Options Database Keys:
30148c07d4e3SBarry Smith +   -mat_block_size - size of the blocks to use
301567b8a455SSatish Balay -   -mat_use_hash_table <fact> - set hash table factor
30163ffaccefSLois Curfman McInnes 
301711a5261eSBarry Smith    It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
3018f6f02116SRichard Tran Mills    MatXXXXSetPreallocation() paradigm instead of this routine directly.
301911a5261eSBarry Smith    [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`]
3020175b88e8SBarry Smith 
3021b259b22eSLois Curfman McInnes    Notes:
302249a6f317SBarry Smith    If the *_nnz parameter is given then the *_nz parameter is ignored
302349a6f317SBarry Smith 
302447a75d0bSBarry Smith    A nonzero block is any block that as 1 or more nonzeros in it
302547a75d0bSBarry Smith 
302679bdfe76SSatish Balay    The user MUST specify either the local or global matrix dimensions
302779bdfe76SSatish Balay    (possibly both).
302879bdfe76SSatish Balay 
302911a5261eSBarry Smith    If `PETSC_DECIDE` or  `PETSC_DETERMINE` is used for a particular argument on one processor
3030be79a94dSBarry Smith    than it must be used on all processors that share the object for that argument.
3031be79a94dSBarry Smith 
303279bdfe76SSatish Balay    Storage Information:
303379bdfe76SSatish Balay    For a square global matrix we define each processor's diagonal portion
303479bdfe76SSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
303579bdfe76SSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
303679bdfe76SSatish Balay    local matrix (a rectangular submatrix).
303779bdfe76SSatish Balay 
303879bdfe76SSatish Balay    The user can specify preallocated storage for the diagonal part of
303979bdfe76SSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
30400298fd71SBarry Smith    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
304179bdfe76SSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
304279bdfe76SSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
304379bdfe76SSatish Balay 
304479bdfe76SSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
304579bdfe76SSatish Balay    the figure below we depict these three local rows and all columns (0-11).
304679bdfe76SSatish Balay 
3047db81eaa0SLois Curfman McInnes .vb
3048db81eaa0SLois Curfman McInnes            0 1 2 3 4 5 6 7 8 9 10 11
3049a4b1a0f6SJed Brown           --------------------------
3050db81eaa0SLois Curfman McInnes    row 3  |o o o d d d o o o o  o  o
3051db81eaa0SLois Curfman McInnes    row 4  |o o o d d d o o o o  o  o
3052db81eaa0SLois Curfman McInnes    row 5  |o o o d d d o o o o  o  o
3053a4b1a0f6SJed Brown           --------------------------
3054db81eaa0SLois Curfman McInnes .ve
305579bdfe76SSatish Balay 
305679bdfe76SSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
305779bdfe76SSatish Balay    submatrix, and any entries in the o locations are stored in the
305879bdfe76SSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
305911a5261eSBarry Smith    stored simply in the `MATSEQBAIJ` format for compressed row storage.
306079bdfe76SSatish Balay 
3061d64ed03dSBarry Smith    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3062d64ed03dSBarry Smith    and o_nz should indicate the number of block nonzeros per row in the o matrix.
306379bdfe76SSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
306492e8d321SLois Curfman McInnes    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
306592e8d321SLois Curfman McInnes    or you will get TERRIBLE performance; see the users' manual chapter on
30666da5968aSLois Curfman McInnes    matrices.
306779bdfe76SSatish Balay 
3068027ccd11SLois Curfman McInnes    Level: intermediate
3069027ccd11SLois Curfman McInnes 
3070db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`
307179bdfe76SSatish Balay @*/
30729371c9d4SSatish Balay PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) {
3073b24ad042SBarry Smith   PetscMPIInt size;
307479bdfe76SSatish Balay 
3075d64ed03dSBarry Smith   PetscFunctionBegin;
30769566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, A));
30779566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*A, m, n, M, N));
30789566063dSJacob Faibussowitsch   PetscCallMPI(MPI_Comm_size(comm, &size));
3079273d9f13SBarry Smith   if (size > 1) {
30809566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATMPIBAIJ));
30819566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
3082273d9f13SBarry Smith   } else {
30839566063dSJacob Faibussowitsch     PetscCall(MatSetType(*A, MATSEQBAIJ));
30849566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
30853914022bSBarry Smith   }
30863a40ed3dSBarry Smith   PetscFunctionReturn(0);
308779bdfe76SSatish Balay }
3088026e39d0SSatish Balay 
30899371c9d4SSatish Balay static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) {
30900ac07820SSatish Balay   Mat          mat;
30910ac07820SSatish Balay   Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data;
3092b24ad042SBarry Smith   PetscInt     len = 0;
30930ac07820SSatish Balay 
3094d64ed03dSBarry Smith   PetscFunctionBegin;
3095f4259b30SLisandro Dalcin   *newmat = NULL;
30969566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
30979566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
30989566063dSJacob Faibussowitsch   PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
30997fff6886SHong Zhang 
3100d5f3da31SBarry Smith   mat->factortype   = matin->factortype;
3101273d9f13SBarry Smith   mat->preallocated = PETSC_TRUE;
31020ac07820SSatish Balay   mat->assembled    = PETSC_TRUE;
31037fff6886SHong Zhang   mat->insertmode   = NOT_SET_VALUES;
31047fff6886SHong Zhang 
3105273d9f13SBarry Smith   a             = (Mat_MPIBAIJ *)mat->data;
3106d0f46423SBarry Smith   mat->rmap->bs = matin->rmap->bs;
31070ac07820SSatish Balay   a->bs2        = oldmat->bs2;
31080ac07820SSatish Balay   a->mbs        = oldmat->mbs;
31090ac07820SSatish Balay   a->nbs        = oldmat->nbs;
31100ac07820SSatish Balay   a->Mbs        = oldmat->Mbs;
31110ac07820SSatish Balay   a->Nbs        = oldmat->Nbs;
31120ac07820SSatish Balay 
31139566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
31149566063dSJacob Faibussowitsch   PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
3115899cda47SBarry Smith 
31160ac07820SSatish Balay   a->size         = oldmat->size;
31170ac07820SSatish Balay   a->rank         = oldmat->rank;
3118aef5e8e0SSatish Balay   a->donotstash   = oldmat->donotstash;
3119aef5e8e0SSatish Balay   a->roworiented  = oldmat->roworiented;
3120f4259b30SLisandro Dalcin   a->rowindices   = NULL;
3121f4259b30SLisandro Dalcin   a->rowvalues    = NULL;
31220ac07820SSatish Balay   a->getrowactive = PETSC_FALSE;
3123f4259b30SLisandro Dalcin   a->barray       = NULL;
3124899cda47SBarry Smith   a->rstartbs     = oldmat->rstartbs;
3125899cda47SBarry Smith   a->rendbs       = oldmat->rendbs;
3126899cda47SBarry Smith   a->cstartbs     = oldmat->cstartbs;
3127899cda47SBarry Smith   a->cendbs       = oldmat->cendbs;
31280ac07820SSatish Balay 
3129133cdb44SSatish Balay   /* hash table stuff */
3130f4259b30SLisandro Dalcin   a->ht           = NULL;
3131f4259b30SLisandro Dalcin   a->hd           = NULL;
3132133cdb44SSatish Balay   a->ht_size      = 0;
3133133cdb44SSatish Balay   a->ht_flag      = oldmat->ht_flag;
313425fdafccSSatish Balay   a->ht_fact      = oldmat->ht_fact;
3135133cdb44SSatish Balay   a->ht_total_ct  = 0;
3136133cdb44SSatish Balay   a->ht_insert_ct = 0;
3137133cdb44SSatish Balay 
31389566063dSJacob Faibussowitsch   PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1));
31390ac07820SSatish Balay   if (oldmat->colmap) {
3140aa482453SBarry Smith #if defined(PETSC_USE_CTABLE)
31419566063dSJacob Faibussowitsch     PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap));
314248e59246SSatish Balay #else
31439566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
31449566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
314548e59246SSatish Balay #endif
3146f4259b30SLisandro Dalcin   } else a->colmap = NULL;
31474beb1cfeSHong Zhang 
31480ac07820SSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
31499566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(len, &a->garray));
31509566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
3151f4259b30SLisandro Dalcin   } else a->garray = NULL;
31520ac07820SSatish Balay 
31539566063dSJacob Faibussowitsch   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
31549566063dSJacob Faibussowitsch   PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
31559566063dSJacob Faibussowitsch   PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
31567fff6886SHong Zhang 
31579566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
31589566063dSJacob Faibussowitsch   PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
31599566063dSJacob Faibussowitsch   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
31600ac07820SSatish Balay   *newmat = mat;
31613a40ed3dSBarry Smith   PetscFunctionReturn(0);
31620ac07820SSatish Balay }
316357b952d6SSatish Balay 
3164618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */
31659371c9d4SSatish Balay PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) {
3166b51a4376SLisandro Dalcin   PetscInt     header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k;
3167b51a4376SLisandro Dalcin   PetscInt    *rowidxs, *colidxs, rs, cs, ce;
3168b51a4376SLisandro Dalcin   PetscScalar *matvals;
3169b51a4376SLisandro Dalcin 
3170b51a4376SLisandro Dalcin   PetscFunctionBegin;
31719566063dSJacob Faibussowitsch   PetscCall(PetscViewerSetUp(viewer));
3172b51a4376SLisandro Dalcin 
3173b51a4376SLisandro Dalcin   /* read in matrix header */
31749566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT));
31755f80ce2aSJacob Faibussowitsch   PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file");
31769371c9d4SSatish Balay   M  = header[1];
31779371c9d4SSatish Balay   N  = header[2];
31789371c9d4SSatish Balay   nz = header[3];
31795f80ce2aSJacob Faibussowitsch   PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M);
31805f80ce2aSJacob Faibussowitsch   PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N);
31815f80ce2aSJacob Faibussowitsch   PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ");
3182b51a4376SLisandro Dalcin 
3183b51a4376SLisandro Dalcin   /* set block sizes from the viewer's .info file */
31849566063dSJacob Faibussowitsch   PetscCall(MatLoad_Binary_BlockSizes(mat, viewer));
3185618cc2edSLisandro Dalcin   /* set local sizes if not set already */
3186618cc2edSLisandro Dalcin   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3187618cc2edSLisandro Dalcin   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3188b51a4376SLisandro Dalcin   /* set global sizes if not set already */
3189b51a4376SLisandro Dalcin   if (mat->rmap->N < 0) mat->rmap->N = M;
3190b51a4376SLisandro Dalcin   if (mat->cmap->N < 0) mat->cmap->N = N;
31919566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->rmap));
31929566063dSJacob Faibussowitsch   PetscCall(PetscLayoutSetUp(mat->cmap));
3193b51a4376SLisandro Dalcin 
3194b51a4376SLisandro Dalcin   /* check if the matrix sizes are correct */
31959566063dSJacob Faibussowitsch   PetscCall(MatGetSize(mat, &rows, &cols));
31965f80ce2aSJacob Faibussowitsch   PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols);
31979566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSize(mat, &bs));
31989566063dSJacob Faibussowitsch   PetscCall(MatGetLocalSize(mat, &m, &n));
31999566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL));
32009566063dSJacob Faibussowitsch   PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce));
32019371c9d4SSatish Balay   mbs = m / bs;
32029371c9d4SSatish Balay   nbs = n / bs;
3203b51a4376SLisandro Dalcin 
3204b51a4376SLisandro Dalcin   /* read in row lengths and build row indices */
32059566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m + 1, &rowidxs));
32069566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT));
32079371c9d4SSatish Balay   rowidxs[0] = 0;
32089371c9d4SSatish Balay   for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i];
32091c2dc1cbSBarry Smith   PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer)));
32105f80ce2aSJacob Faibussowitsch   PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum);
3211b51a4376SLisandro Dalcin 
3212b51a4376SLisandro Dalcin   /* read in column indices and matrix values */
32139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals));
32149566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT));
32159566063dSJacob Faibussowitsch   PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR));
3216b51a4376SLisandro Dalcin 
3217b51a4376SLisandro Dalcin   {                /* preallocate matrix storage */
3218b51a4376SLisandro Dalcin     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3219b51a4376SLisandro Dalcin     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3220618cc2edSLisandro Dalcin     PetscBool  sbaij, done;
3221b51a4376SLisandro Dalcin     PetscInt  *d_nnz, *o_nnz;
3222b51a4376SLisandro Dalcin 
32239566063dSJacob Faibussowitsch     PetscCall(PetscBTCreate(nbs, &bt));
32249566063dSJacob Faibussowitsch     PetscCall(PetscHSetICreate(&ht));
32259566063dSJacob Faibussowitsch     PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz));
32269566063dSJacob Faibussowitsch     PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij));
3227b51a4376SLisandro Dalcin     for (i = 0; i < mbs; i++) {
32289566063dSJacob Faibussowitsch       PetscCall(PetscBTMemzero(nbs, bt));
32299566063dSJacob Faibussowitsch       PetscCall(PetscHSetIClear(ht));
3230618cc2edSLisandro Dalcin       for (k = 0; k < bs; k++) {
3231618cc2edSLisandro Dalcin         PetscInt row = bs * i + k;
3232618cc2edSLisandro Dalcin         for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) {
3233618cc2edSLisandro Dalcin           PetscInt col = colidxs[j];
3234618cc2edSLisandro Dalcin           if (!sbaij || col >= row) {
3235618cc2edSLisandro Dalcin             if (col >= cs && col < ce) {
3236618cc2edSLisandro Dalcin               if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++;
3237b51a4376SLisandro Dalcin             } else {
32389566063dSJacob Faibussowitsch               PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done));
3239b51a4376SLisandro Dalcin               if (done) o_nnz[i]++;
3240b51a4376SLisandro Dalcin             }
3241b51a4376SLisandro Dalcin           }
3242618cc2edSLisandro Dalcin         }
3243618cc2edSLisandro Dalcin       }
3244618cc2edSLisandro Dalcin     }
32459566063dSJacob Faibussowitsch     PetscCall(PetscBTDestroy(&bt));
32469566063dSJacob Faibussowitsch     PetscCall(PetscHSetIDestroy(&ht));
32479566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
32489566063dSJacob Faibussowitsch     PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz));
32499566063dSJacob Faibussowitsch     PetscCall(PetscFree2(d_nnz, o_nnz));
3250b51a4376SLisandro Dalcin   }
3251b51a4376SLisandro Dalcin 
3252b51a4376SLisandro Dalcin   /* store matrix values */
3253b51a4376SLisandro Dalcin   for (i = 0; i < m; i++) {
3254b51a4376SLisandro Dalcin     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1];
32559566063dSJacob Faibussowitsch     PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES));
3256b51a4376SLisandro Dalcin   }
3257b51a4376SLisandro Dalcin 
32589566063dSJacob Faibussowitsch   PetscCall(PetscFree(rowidxs));
32599566063dSJacob Faibussowitsch   PetscCall(PetscFree2(colidxs, matvals));
32609566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
32619566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
3262b51a4376SLisandro Dalcin   PetscFunctionReturn(0);
3263b51a4376SLisandro Dalcin }
3264b51a4376SLisandro Dalcin 
32659371c9d4SSatish Balay PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) {
32667f489da9SVaclav Hapla   PetscBool isbinary;
32674683f7a4SShri Abhyankar 
32684683f7a4SShri Abhyankar   PetscFunctionBegin;
32699566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
32705f80ce2aSJacob Faibussowitsch   PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
32719566063dSJacob Faibussowitsch   PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer));
32724683f7a4SShri Abhyankar   PetscFunctionReturn(0);
32734683f7a4SShri Abhyankar }
32744683f7a4SShri Abhyankar 
3275133cdb44SSatish Balay /*@
327611a5261eSBarry Smith    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table
3277133cdb44SSatish Balay 
3278133cdb44SSatish Balay    Input Parameters:
3279a2b725a8SWilliam Gropp +  mat  - the matrix
3280a2b725a8SWilliam Gropp -  fact - factor
3281133cdb44SSatish Balay 
328211a5261eSBarry Smith    Options Database Key:
328311a5261eSBarry Smith .  -mat_use_hash_table <fact> - provide the factor
3284fee21e36SBarry Smith 
32858c890885SBarry Smith    Level: advanced
32868c890885SBarry Smith 
328711a5261eSBarry Smith .seealso: `MATMPIBAIJ`, `MatSetOption()`
3288133cdb44SSatish Balay @*/
32899371c9d4SSatish Balay PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) {
32905bf65638SKris Buschelman   PetscFunctionBegin;
3291cac4c232SBarry Smith   PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact));
32925bf65638SKris Buschelman   PetscFunctionReturn(0);
32935bf65638SKris Buschelman }
32945bf65638SKris Buschelman 
32959371c9d4SSatish Balay PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) {
329625fdafccSSatish Balay   Mat_MPIBAIJ *baij;
3297133cdb44SSatish Balay 
3298133cdb44SSatish Balay   PetscFunctionBegin;
3299133cdb44SSatish Balay   baij          = (Mat_MPIBAIJ *)mat->data;
3300133cdb44SSatish Balay   baij->ht_fact = fact;
3301133cdb44SSatish Balay   PetscFunctionReturn(0);
3302133cdb44SSatish Balay }
3303f2a5309cSSatish Balay 
33049371c9d4SSatish Balay PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) {
3305f2a5309cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3306ab4d48faSStefano Zampini   PetscBool    flg;
33075fd66863SKarl Rupp 
3308f2a5309cSSatish Balay   PetscFunctionBegin;
33099566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg));
33105f80ce2aSJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input");
331121e72a00SBarry Smith   if (Ad) *Ad = a->A;
331221e72a00SBarry Smith   if (Ao) *Ao = a->B;
331321e72a00SBarry Smith   if (colmap) *colmap = a->garray;
3314f2a5309cSSatish Balay   PetscFunctionReturn(0);
3315f2a5309cSSatish Balay }
331685535b8eSBarry Smith 
331785535b8eSBarry Smith /*
331885535b8eSBarry Smith     Special version for direct calls from Fortran (to eliminate two function call overheads
331985535b8eSBarry Smith */
332085535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS)
332185535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
332285535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
332385535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
332485535b8eSBarry Smith #endif
332585535b8eSBarry Smith 
332685535b8eSBarry Smith /*@C
332711a5261eSBarry Smith   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()`
332885535b8eSBarry Smith 
332985535b8eSBarry Smith   Collective on Mat
333085535b8eSBarry Smith 
333185535b8eSBarry Smith   Input Parameters:
333285535b8eSBarry Smith + mat - the matrix
333385535b8eSBarry Smith . min - number of input rows
333485535b8eSBarry Smith . im - input rows
333585535b8eSBarry Smith . nin - number of input columns
333685535b8eSBarry Smith . in - input columns
333785535b8eSBarry Smith . v - numerical values input
333811a5261eSBarry Smith - addvin - `INSERT_VALUES` or `ADD_VALUES`
333985535b8eSBarry Smith 
334011a5261eSBarry Smith   Developer Note:
334111a5261eSBarry Smith     This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse.
334285535b8eSBarry Smith 
334385535b8eSBarry Smith   Level: advanced
334485535b8eSBarry Smith 
3345db781477SPatrick Sanan .seealso: `MatSetValuesBlocked()`
334685535b8eSBarry Smith @*/
33479371c9d4SSatish Balay PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) {
334885535b8eSBarry Smith   /* convert input arguments to C version */
334985535b8eSBarry Smith   Mat        mat = *matin;
335085535b8eSBarry Smith   PetscInt   m = *min, n = *nin;
335185535b8eSBarry Smith   InsertMode addv = *addvin;
335285535b8eSBarry Smith 
335385535b8eSBarry Smith   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ *)mat->data;
335485535b8eSBarry Smith   const MatScalar *value;
335585535b8eSBarry Smith   MatScalar       *barray      = baij->barray;
3356ace3abfcSBarry Smith   PetscBool        roworiented = baij->roworiented;
335785535b8eSBarry Smith   PetscInt         i, j, ii, jj, row, col, rstart = baij->rstartbs;
335885535b8eSBarry Smith   PetscInt         rend = baij->rendbs, cstart = baij->cstartbs, stepval;
3359d0f46423SBarry Smith   PetscInt         cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
336085535b8eSBarry Smith 
336185535b8eSBarry Smith   PetscFunctionBegin;
336285535b8eSBarry Smith   /* tasks normally handled by MatSetValuesBlocked() */
336326fbe8dcSKarl Rupp   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
33645f80ce2aSJacob Faibussowitsch   else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values");
33655f80ce2aSJacob Faibussowitsch   PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix");
336685535b8eSBarry Smith   if (mat->assembled) {
336785535b8eSBarry Smith     mat->was_assembled = PETSC_TRUE;
336885535b8eSBarry Smith     mat->assembled     = PETSC_FALSE;
336985535b8eSBarry Smith   }
33709566063dSJacob Faibussowitsch   PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0));
337185535b8eSBarry Smith 
337285535b8eSBarry Smith   if (!barray) {
33739566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(bs2, &barray));
337485535b8eSBarry Smith     baij->barray = barray;
337585535b8eSBarry Smith   }
337685535b8eSBarry Smith 
337726fbe8dcSKarl Rupp   if (roworiented) stepval = (n - 1) * bs;
337826fbe8dcSKarl Rupp   else stepval = (m - 1) * bs;
337926fbe8dcSKarl Rupp 
338085535b8eSBarry Smith   for (i = 0; i < m; i++) {
338185535b8eSBarry Smith     if (im[i] < 0) continue;
33826bdcaf15SBarry Smith     PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
338385535b8eSBarry Smith     if (im[i] >= rstart && im[i] < rend) {
338485535b8eSBarry Smith       row = im[i] - rstart;
338585535b8eSBarry Smith       for (j = 0; j < n; j++) {
338685535b8eSBarry Smith         /* If NumCol = 1 then a copy is not required */
338785535b8eSBarry Smith         if ((roworiented) && (n == 1)) {
338885535b8eSBarry Smith           barray = (MatScalar *)v + i * bs2;
338985535b8eSBarry Smith         } else if ((!roworiented) && (m == 1)) {
339085535b8eSBarry Smith           barray = (MatScalar *)v + j * bs2;
339185535b8eSBarry Smith         } else { /* Here a copy is required */
339285535b8eSBarry Smith           if (roworiented) {
339385535b8eSBarry Smith             value = v + i * (stepval + bs) * bs + j * bs;
339485535b8eSBarry Smith           } else {
339585535b8eSBarry Smith             value = v + j * (stepval + bs) * bs + i * bs;
339685535b8eSBarry Smith           }
339785535b8eSBarry Smith           for (ii = 0; ii < bs; ii++, value += stepval) {
3398ad540459SPierre Jolivet             for (jj = 0; jj < bs; jj++) *barray++ = *value++;
339985535b8eSBarry Smith           }
340085535b8eSBarry Smith           barray -= bs2;
340185535b8eSBarry Smith         }
340285535b8eSBarry Smith 
340385535b8eSBarry Smith         if (in[j] >= cstart && in[j] < cend) {
340485535b8eSBarry Smith           col = in[j] - cstart;
34059566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
3406f7d195e4SLawrence Mitchell         } else if (in[j] < 0) {
3407f7d195e4SLawrence Mitchell           continue;
3408f7d195e4SLawrence Mitchell         } else {
3409f7d195e4SLawrence Mitchell           PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
341085535b8eSBarry Smith           if (mat->was_assembled) {
341148a46eb9SPierre Jolivet             if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
341285535b8eSBarry Smith 
341385535b8eSBarry Smith #if defined(PETSC_USE_DEBUG)
341485535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
34159371c9d4SSatish Balay             {
34169371c9d4SSatish Balay               PetscInt data;
34179566063dSJacob Faibussowitsch               PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data));
341808401ef6SPierre Jolivet               PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
341985535b8eSBarry Smith             }
342085535b8eSBarry Smith #else
342108401ef6SPierre Jolivet             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
342285535b8eSBarry Smith #endif
342385535b8eSBarry Smith #endif
342485535b8eSBarry Smith #if defined(PETSC_USE_CTABLE)
34259566063dSJacob Faibussowitsch             PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col));
342685535b8eSBarry Smith             col = (col - 1) / bs;
342785535b8eSBarry Smith #else
342885535b8eSBarry Smith             col = (baij->colmap[in[j]] - 1) / bs;
342985535b8eSBarry Smith #endif
343085535b8eSBarry Smith             if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
34319566063dSJacob Faibussowitsch               PetscCall(MatDisAssemble_MPIBAIJ(mat));
343285535b8eSBarry Smith               col = in[j];
343385535b8eSBarry Smith             }
343426fbe8dcSKarl Rupp           } else col = in[j];
34359566063dSJacob Faibussowitsch           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
343685535b8eSBarry Smith         }
343785535b8eSBarry Smith       }
343885535b8eSBarry Smith     } else {
343985535b8eSBarry Smith       if (!baij->donotstash) {
344085535b8eSBarry Smith         if (roworiented) {
34419566063dSJacob Faibussowitsch           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
344285535b8eSBarry Smith         } else {
34439566063dSJacob Faibussowitsch           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
344485535b8eSBarry Smith         }
344585535b8eSBarry Smith       }
344685535b8eSBarry Smith     }
344785535b8eSBarry Smith   }
344885535b8eSBarry Smith 
344985535b8eSBarry Smith   /* task normally handled by MatSetValuesBlocked() */
34509566063dSJacob Faibussowitsch   PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0));
345185535b8eSBarry Smith   PetscFunctionReturn(0);
345285535b8eSBarry Smith }
3453dfb205c3SBarry Smith 
3454dfb205c3SBarry Smith /*@
345511a5261eSBarry Smith      MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block
3456dfb205c3SBarry Smith          CSR format the local rows.
3457dfb205c3SBarry Smith 
3458d083f849SBarry Smith    Collective
3459dfb205c3SBarry Smith 
3460dfb205c3SBarry Smith    Input Parameters:
3461dfb205c3SBarry Smith +  comm - MPI communicator
3462dfb205c3SBarry Smith .  bs - the block size, only a block size of 1 is supported
346311a5261eSBarry Smith .  m - number of local rows (Cannot be `PETSC_DECIDE`)
3464dfb205c3SBarry Smith .  n - This value should be the same as the local size used in creating the
346511a5261eSBarry Smith        x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
3466dfb205c3SBarry Smith        calculated if N is given) For square matrices n is almost always m.
346711a5261eSBarry Smith .  M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given)
346811a5261eSBarry Smith .  N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given)
3469483a2f95SBarry Smith .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3470dfb205c3SBarry Smith .   j - column indices
3471dfb205c3SBarry Smith -   a - matrix values
3472dfb205c3SBarry Smith 
3473dfb205c3SBarry Smith    Output Parameter:
3474dfb205c3SBarry Smith .   mat - the matrix
3475dfb205c3SBarry Smith 
3476dfb205c3SBarry Smith    Level: intermediate
3477dfb205c3SBarry Smith 
3478dfb205c3SBarry Smith    Notes:
3479dfb205c3SBarry Smith        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3480dfb205c3SBarry Smith      thus you CANNOT change the matrix entries by changing the values of a[] after you have
348111a5261eSBarry Smith      called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
3482dfb205c3SBarry Smith 
34833adadaf3SJed Brown      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
34843adadaf3SJed Brown      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
34853adadaf3SJed Brown      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
34863adadaf3SJed Brown      with column-major ordering within blocks.
34873adadaf3SJed Brown 
3488dfb205c3SBarry Smith        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3489dfb205c3SBarry Smith 
3490db781477SPatrick Sanan .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
3491db781477SPatrick Sanan           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
3492dfb205c3SBarry Smith @*/
34939371c9d4SSatish Balay PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) {
3494dfb205c3SBarry Smith   PetscFunctionBegin;
34955f80ce2aSJacob Faibussowitsch   PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
34965f80ce2aSJacob Faibussowitsch   PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
34979566063dSJacob Faibussowitsch   PetscCall(MatCreate(comm, mat));
34989566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(*mat, m, n, M, N));
34999566063dSJacob Faibussowitsch   PetscCall(MatSetType(*mat, MATMPIBAIJ));
35009566063dSJacob Faibussowitsch   PetscCall(MatSetBlockSize(*mat, bs));
35019566063dSJacob Faibussowitsch   PetscCall(MatSetUp(*mat));
35029566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE));
35039566063dSJacob Faibussowitsch   PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a));
35049566063dSJacob Faibussowitsch   PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE));
3505dfb205c3SBarry Smith   PetscFunctionReturn(0);
3506dfb205c3SBarry Smith }
3507e561ad89SHong Zhang 
35089371c9d4SSatish Balay PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) {
3509bd153df0SHong Zhang   PetscInt     m, N, i, rstart, nnz, Ii, bs, cbs;
3510bd153df0SHong Zhang   PetscInt    *indx;
3511bd153df0SHong Zhang   PetscScalar *values;
3512e561ad89SHong Zhang 
3513e561ad89SHong Zhang   PetscFunctionBegin;
35149566063dSJacob Faibussowitsch   PetscCall(MatGetSize(inmat, &m, &N));
3515bd153df0SHong Zhang   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3516bd153df0SHong Zhang     Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data;
35172c6ba4edSHong Zhang     PetscInt    *dnz, *onz, mbs, Nbs, nbs;
3518bd153df0SHong Zhang     PetscInt    *bindx, rmax = a->rmax, j;
351977f764caSHong Zhang     PetscMPIInt  rank, size;
3520e561ad89SHong Zhang 
35219566063dSJacob Faibussowitsch     PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
35229371c9d4SSatish Balay     mbs = m / bs;
35239371c9d4SSatish Balay     Nbs = N / cbs;
352448a46eb9SPierre Jolivet     if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
3525da91a574SPierre Jolivet     nbs = n / cbs;
3526e561ad89SHong Zhang 
35279566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(rmax, &bindx));
3528d0609cedSBarry Smith     MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
352977f764caSHong Zhang 
35309566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &rank));
35319566063dSJacob Faibussowitsch     PetscCallMPI(MPI_Comm_rank(comm, &size));
353277f764caSHong Zhang     if (rank == size - 1) {
353377f764caSHong Zhang       /* Check sum(nbs) = Nbs */
35345f80ce2aSJacob Faibussowitsch       PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
353577f764caSHong Zhang     }
353677f764caSHong Zhang 
3537d0609cedSBarry Smith     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
3538bd153df0SHong Zhang     for (i = 0; i < mbs; i++) {
35399566063dSJacob Faibussowitsch       PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
3540647a6520SHong Zhang       nnz = nnz / bs;
3541647a6520SHong Zhang       for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
35429566063dSJacob Faibussowitsch       PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
35439566063dSJacob Faibussowitsch       PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL));
3544e561ad89SHong Zhang     }
35459566063dSJacob Faibussowitsch     PetscCall(PetscFree(bindx));
3546e561ad89SHong Zhang 
35479566063dSJacob Faibussowitsch     PetscCall(MatCreate(comm, outmat));
35489566063dSJacob Faibussowitsch     PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
35499566063dSJacob Faibussowitsch     PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
35509566063dSJacob Faibussowitsch     PetscCall(MatSetType(*outmat, MATBAIJ));
35519566063dSJacob Faibussowitsch     PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz));
35529566063dSJacob Faibussowitsch     PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
3553d0609cedSBarry Smith     MatPreallocateEnd(dnz, onz);
35549566063dSJacob Faibussowitsch     PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE));
3555e561ad89SHong Zhang   }
3556e561ad89SHong Zhang 
3557bd153df0SHong Zhang   /* numeric phase */
35589566063dSJacob Faibussowitsch   PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
35599566063dSJacob Faibussowitsch   PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
3560e561ad89SHong Zhang 
3561e561ad89SHong Zhang   for (i = 0; i < m; i++) {
35629566063dSJacob Faibussowitsch     PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3563e561ad89SHong Zhang     Ii = i + rstart;
35649566063dSJacob Faibussowitsch     PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
35659566063dSJacob Faibussowitsch     PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values));
3566e561ad89SHong Zhang   }
35679566063dSJacob Faibussowitsch   PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
35689566063dSJacob Faibussowitsch   PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
3569e561ad89SHong Zhang   PetscFunctionReturn(0);
3570e561ad89SHong Zhang }
3571