xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 8fb5bd83c3955fefcf33a54e3bb66920a9fa884b)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21 
22   PetscFunctionBegin;
23   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
24   PetscValidLogicalCollectiveInt(B,nm,2);
25   for (i = 0; i < nm; i++) {
26     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
27     PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong));
28     PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
29   }
30   PetscValidLogicalCollectiveBool(B,fill,5);
31   PetscCall(MatGetBlockSize(B,&bs));
32   PetscCall(MatGetSize(B,&M,&N));
33   PetscCall(MatGetLocalSize(B,&m,&n));
34   PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator));
35   PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
36   PetscCall(MatSetBlockSize(preallocator,bs));
37   PetscCall(MatSetSizes(preallocator,m,n,M,N));
38   PetscCall(MatSetUp(preallocator));
39   PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
40   for (r = rstart; r < rend; ++r) {
41     PetscInt          ncols;
42     const PetscInt    *row;
43     const PetscScalar *vals;
44 
45     for (i = 0; i < nm; i++) {
46       PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals));
47       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
48       if (symm && symm[i]) {
49         PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES));
50       }
51       PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals));
52     }
53   }
54   PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
55   PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
56   PetscCall(MatPreallocatorPreallocate(preallocator,fill,B));
57   PetscCall(MatDestroy(&preallocator));
58   PetscFunctionReturn(0);
59 }
60 
61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
62 {
63   Mat            B;
64   PetscInt       r;
65 
66   PetscFunctionBegin;
67   if (reuse != MAT_REUSE_MATRIX) {
68     PetscBool symm = PETSC_TRUE,isdense;
69     PetscInt  bs;
70 
71     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
72     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
73     PetscCall(MatSetType(B,newtype));
74     PetscCall(MatGetBlockSize(A,&bs));
75     PetscCall(MatSetBlockSize(B,bs));
76     PetscCall(PetscLayoutSetUp(B->rmap));
77     PetscCall(PetscLayoutSetUp(B->cmap));
78     PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,""));
79     if (!isdense) {
80       PetscCall(MatGetRowUpperTriangular(A));
81       PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE));
82       PetscCall(MatRestoreRowUpperTriangular(A));
83     } else {
84       PetscCall(MatSetUp(B));
85     }
86   } else {
87     B    = *newmat;
88     PetscCall(MatZeroEntries(B));
89   }
90 
91   PetscCall(MatGetRowUpperTriangular(A));
92   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
93     PetscInt          ncols;
94     const PetscInt    *row;
95     const PetscScalar *vals;
96 
97     PetscCall(MatGetRow(A,r,&ncols,&row,&vals));
98     PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES));
99 #if defined(PETSC_USE_COMPLEX)
100     if (A->hermitian) {
101       PetscInt i;
102       for (i = 0; i < ncols; i++) {
103         PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES));
104       }
105     } else {
106       PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
107     }
108 #else
109     PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
110 #endif
111     PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals));
112   }
113   PetscCall(MatRestoreRowUpperTriangular(A));
114   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
116 
117   if (reuse == MAT_INPLACE_MATRIX) {
118     PetscCall(MatHeaderReplace(A,&B));
119   } else {
120     *newmat = B;
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
126 {
127   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
128 
129   PetscFunctionBegin;
130   PetscCall(MatStoreValues(aij->A));
131   PetscCall(MatStoreValues(aij->B));
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138 
139   PetscFunctionBegin;
140   PetscCall(MatRetrieveValues(aij->A));
141   PetscCall(MatRetrieveValues(aij->B));
142   PetscFunctionReturn(0);
143 }
144 
145 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
146   { \
147     brow = row/bs;  \
148     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
149     rmax = aimax[brow]; nrow = ailen[brow]; \
150     bcol = col/bs; \
151     ridx = row % bs; cidx = col % bs; \
152     low  = 0; high = nrow; \
153     while (high-low > 3) { \
154       t = (low+high)/2; \
155       if (rp[t] > bcol) high = t; \
156       else              low  = t; \
157     } \
158     for (_i=low; _i<high; _i++) { \
159       if (rp[_i] > bcol) break; \
160       if (rp[_i] == bcol) { \
161         bap = ap + bs2*_i + bs*cidx + ridx; \
162         if (addv == ADD_VALUES) *bap += value;  \
163         else                    *bap  = value;  \
164         goto a_noinsert; \
165       } \
166     } \
167     if (a->nonew == 1) goto a_noinsert; \
168     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
170     N = nrow++ - 1;  \
171     /* shift up all the later entries in this row */ \
172     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
173     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
174     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
175     rp[_i]                      = bcol;  \
176     ap[bs2*_i + bs*cidx + ridx] = value;  \
177     A->nonzerostate++;\
178 a_noinsert:; \
179     ailen[brow] = nrow; \
180   }
181 
182 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
183   { \
184     brow = row/bs;  \
185     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
186     rmax = bimax[brow]; nrow = bilen[brow]; \
187     bcol = col/bs; \
188     ridx = row % bs; cidx = col % bs; \
189     low  = 0; high = nrow; \
190     while (high-low > 3) { \
191       t = (low+high)/2; \
192       if (rp[t] > bcol) high = t; \
193       else              low  = t; \
194     } \
195     for (_i=low; _i<high; _i++) { \
196       if (rp[_i] > bcol) break; \
197       if (rp[_i] == bcol) { \
198         bap = ap + bs2*_i + bs*cidx + ridx; \
199         if (addv == ADD_VALUES) *bap += value;  \
200         else                    *bap  = value;  \
201         goto b_noinsert; \
202       } \
203     } \
204     if (b->nonew == 1) goto b_noinsert; \
205     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
206     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
207     N = nrow++ - 1;  \
208     /* shift up all the later entries in this row */ \
209     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
210     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
211     PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \
212     rp[_i]                      = bcol;  \
213     ap[bs2*_i + bs*cidx + ridx] = value;  \
214     B->nonzerostate++;\
215 b_noinsert:; \
216     bilen[brow] = nrow; \
217   }
218 
219 /* Only add/insert a(i,j) with i<=j (blocks).
220    Any a(i,j) with i>j input by user is ingored or generates an error
221 */
222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
223 {
224   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
225   MatScalar      value;
226   PetscBool      roworiented = baij->roworiented;
227   PetscInt       i,j,row,col;
228   PetscInt       rstart_orig=mat->rmap->rstart;
229   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
230   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
231 
232   /* Some Variables required in the macro */
233   Mat          A     = baij->A;
234   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
235   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
236   MatScalar    *aa   =a->a;
237 
238   Mat         B     = baij->B;
239   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
240   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
241   MatScalar   *ba   =b->a;
242 
243   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
244   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
245   MatScalar *ap,*bap;
246 
247   /* for stash */
248   PetscInt  n_loc, *in_loc = NULL;
249   MatScalar *v_loc = NULL;
250 
251   PetscFunctionBegin;
252   if (!baij->donotstash) {
253     if (n > baij->n_loc) {
254       PetscCall(PetscFree(baij->in_loc));
255       PetscCall(PetscFree(baij->v_loc));
256       PetscCall(PetscMalloc1(n,&baij->in_loc));
257       PetscCall(PetscMalloc1(n,&baij->v_loc));
258 
259       baij->n_loc = n;
260     }
261     in_loc = baij->in_loc;
262     v_loc  = baij->v_loc;
263   }
264 
265   for (i=0; i<m; i++) {
266     if (im[i] < 0) continue;
267     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
268     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
269       row = im[i] - rstart_orig;              /* local row index */
270       for (j=0; j<n; j++) {
271         if (im[i]/bs > in[j]/bs) {
272           if (a->ignore_ltriangular) {
273             continue;    /* ignore lower triangular blocks */
274           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
275         }
276         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
277           col  = in[j] - cstart_orig;         /* local col index */
278           brow = row/bs; bcol = col/bs;
279           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
280           if (roworiented) value = v[i*n+j];
281           else             value = v[i+j*m];
282           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
283           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
284         } else if (in[j] < 0) continue;
285         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
286         else {  /* off-diag entry (B) */
287           if (mat->was_assembled) {
288             if (!baij->colmap) {
289               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
290             }
291 #if defined(PETSC_USE_CTABLE)
292             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
293             col  = col - 1;
294 #else
295             col = baij->colmap[in[j]/bs] - 1;
296 #endif
297             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
298               PetscCall(MatDisAssemble_MPISBAIJ(mat));
299               col  =  in[j];
300               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
301               B    = baij->B;
302               b    = (Mat_SeqBAIJ*)(B)->data;
303               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
304               ba   = b->a;
305             } else col += in[j]%bs;
306           } else col = in[j];
307           if (roworiented) value = v[i*n+j];
308           else             value = v[i+j*m];
309           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
310           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
311         }
312       }
313     } else {  /* off processor entry */
314       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
315       if (!baij->donotstash) {
316         mat->assembled = PETSC_FALSE;
317         n_loc          = 0;
318         for (j=0; j<n; j++) {
319           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
320           in_loc[n_loc] = in[j];
321           if (roworiented) {
322             v_loc[n_loc] = v[i*n+j];
323           } else {
324             v_loc[n_loc] = v[j*m+i];
325           }
326           n_loc++;
327         }
328         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE));
329       }
330     }
331   }
332   PetscFunctionReturn(0);
333 }
334 
335 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
336 {
337   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
338   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
339   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
340   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
341   PetscBool         roworiented=a->roworiented;
342   const PetscScalar *value     = v;
343   MatScalar         *ap,*aa = a->a,*bap;
344 
345   PetscFunctionBegin;
346   if (col < row) {
347     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
348     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
349   }
350   rp   = aj + ai[row];
351   ap   = aa + bs2*ai[row];
352   rmax = imax[row];
353   nrow = ailen[row];
354   value = v;
355   low   = 0;
356   high  = nrow;
357 
358   while (high-low > 7) {
359     t = (low+high)/2;
360     if (rp[t] > col) high = t;
361     else             low  = t;
362   }
363   for (i=low; i<high; i++) {
364     if (rp[i] > col) break;
365     if (rp[i] == col) {
366       bap = ap +  bs2*i;
367       if (roworiented) {
368         if (is == ADD_VALUES) {
369           for (ii=0; ii<bs; ii++) {
370             for (jj=ii; jj<bs2; jj+=bs) {
371               bap[jj] += *value++;
372             }
373           }
374         } else {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] = *value++;
378             }
379           }
380         }
381       } else {
382         if (is == ADD_VALUES) {
383           for (ii=0; ii<bs; ii++) {
384             for (jj=0; jj<bs; jj++) {
385               *bap++ += *value++;
386             }
387           }
388         } else {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++  = *value++;
392             }
393           }
394         }
395       }
396       goto noinsert2;
397     }
398   }
399   if (nonew == 1) goto noinsert2;
400   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
401   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
402   N = nrow++ - 1; high++;
403   /* shift up all the later entries in this row */
404   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
405   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427    This routine is exactly duplicated in mpibaij.c
428 */
429 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
430 {
431   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
432   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
433   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
434   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
435   PetscBool         roworiented=a->roworiented;
436   const PetscScalar *value     = v;
437   MatScalar         *ap,*aa = a->a,*bap;
438 
439   PetscFunctionBegin;
440   rp   = aj + ai[row];
441   ap   = aa + bs2*ai[row];
442   rmax = imax[row];
443   nrow = ailen[row];
444   low  = 0;
445   high = nrow;
446   value = v;
447   while (high-low > 7) {
448     t = (low+high)/2;
449     if (rp[t] > col) high = t;
450     else             low  = t;
451   }
452   for (i=low; i<high; i++) {
453     if (rp[i] > col) break;
454     if (rp[i] == col) {
455       bap = ap +  bs2*i;
456       if (roworiented) {
457         if (is == ADD_VALUES) {
458           for (ii=0; ii<bs; ii++) {
459             for (jj=ii; jj<bs2; jj+=bs) {
460               bap[jj] += *value++;
461             }
462           }
463         } else {
464           for (ii=0; ii<bs; ii++) {
465             for (jj=ii; jj<bs2; jj+=bs) {
466               bap[jj] = *value++;
467             }
468           }
469         }
470       } else {
471         if (is == ADD_VALUES) {
472           for (ii=0; ii<bs; ii++,value+=bs) {
473             for (jj=0; jj<bs; jj++) {
474               bap[jj] += value[jj];
475             }
476             bap += bs;
477           }
478         } else {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj]  = value[jj];
482             }
483             bap += bs;
484           }
485         }
486       }
487       goto noinsert2;
488     }
489   }
490   if (nonew == 1) goto noinsert2;
491   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
492   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
493   N = nrow++ - 1; high++;
494   /* shift up all the later entries in this row */
495   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
496   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
497   rp[i] = col;
498   bap   = ap +  bs2*i;
499   if (roworiented) {
500     for (ii=0; ii<bs; ii++) {
501       for (jj=ii; jj<bs2; jj+=bs) {
502         bap[jj] = *value++;
503       }
504     }
505   } else {
506     for (ii=0; ii<bs; ii++) {
507       for (jj=0; jj<bs; jj++) {
508         *bap++ = *value++;
509       }
510     }
511   }
512   noinsert2:;
513   ailen[row] = nrow;
514   PetscFunctionReturn(0);
515 }
516 
517 /*
518     This routine could be optimized by removing the need for the block copy below and passing stride information
519   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
520 */
521 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
522 {
523   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
524   const MatScalar *value;
525   MatScalar       *barray     =baij->barray;
526   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
527   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
528   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
529   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
530 
531   PetscFunctionBegin;
532   if (!barray) {
533     PetscCall(PetscMalloc1(bs2,&barray));
534     baij->barray = barray;
535   }
536 
537   if (roworiented) {
538     stepval = (n-1)*bs;
539   } else {
540     stepval = (m-1)*bs;
541   }
542   for (i=0; i<m; i++) {
543     if (im[i] < 0) continue;
544     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
545     if (im[i] >= rstart && im[i] < rend) {
546       row = im[i] - rstart;
547       for (j=0; j<n; j++) {
548         if (im[i] > in[j]) {
549           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
550           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
551         }
552         /* If NumCol = 1 then a copy is not required */
553         if ((roworiented) && (n == 1)) {
554           barray = (MatScalar*) v + i*bs2;
555         } else if ((!roworiented) && (m == 1)) {
556           barray = (MatScalar*) v + j*bs2;
557         } else { /* Here a copy is required */
558           if (roworiented) {
559             value = v + i*(stepval+bs)*bs + j*bs;
560           } else {
561             value = v + j*(stepval+bs)*bs + i*bs;
562           }
563           for (ii=0; ii<bs; ii++,value+=stepval) {
564             for (jj=0; jj<bs; jj++) {
565               *barray++ = *value++;
566             }
567           }
568           barray -=bs2;
569         }
570 
571         if (in[j] >= cstart && in[j] < cend) {
572           col  = in[j] - cstart;
573           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
574         } else if (in[j] < 0) continue;
575         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
576         else {
577           if (mat->was_assembled) {
578             if (!baij->colmap) {
579               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
580             }
581 
582 #if defined(PETSC_USE_DEBUG)
583 #if defined(PETSC_USE_CTABLE)
584             { PetscInt data;
585               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
586               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
587             }
588 #else
589             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
590 #endif
591 #endif
592 #if defined(PETSC_USE_CTABLE)
593             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
594             col  = (col - 1)/bs;
595 #else
596             col = (baij->colmap[in[j]] - 1)/bs;
597 #endif
598             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
599               PetscCall(MatDisAssemble_MPISBAIJ(mat));
600               col  = in[j];
601             }
602           } else col = in[j];
603           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
604         }
605       }
606     } else {
607       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
608       if (!baij->donotstash) {
609         if (roworiented) {
610           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
611         } else {
612           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
613         }
614       }
615     }
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621 {
622   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
623   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
624   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
625 
626   PetscFunctionBegin;
627   for (i=0; i<m; i++) {
628     if (idxm[i] < 0) continue; /* negative row */
629     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
630     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
631       row = idxm[i] - bsrstart;
632       for (j=0; j<n; j++) {
633         if (idxn[j] < 0) continue; /* negative column */
634         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
635         if (idxn[j] >= bscstart && idxn[j] < bscend) {
636           col  = idxn[j] - bscstart;
637           PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
638         } else {
639           if (!baij->colmap) {
640             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
641           }
642 #if defined(PETSC_USE_CTABLE)
643           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
644           data--;
645 #else
646           data = baij->colmap[idxn[j]/bs]-1;
647 #endif
648           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
649           else {
650             col  = data + idxn[j]%bs;
651             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
652           }
653         }
654       }
655     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
661 {
662   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
663   PetscReal      sum[2],*lnorm2;
664 
665   PetscFunctionBegin;
666   if (baij->size == 1) {
667     PetscCall(MatNorm(baij->A,type,norm));
668   } else {
669     if (type == NORM_FROBENIUS) {
670       PetscCall(PetscMalloc1(2,&lnorm2));
671       PetscCall(MatNorm(baij->A,type,lnorm2));
672       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
673       PetscCall(MatNorm(baij->B,type,lnorm2));
674       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
675       PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
676       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
677       PetscCall(PetscFree(lnorm2));
678     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
679       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
680       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
681       PetscReal    *rsum,*rsum2,vabs;
682       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
683       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
684       MatScalar    *v;
685 
686       PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2));
687       PetscCall(PetscArrayzero(rsum,mat->cmap->N));
688       /* Amat */
689       v = amat->a; jj = amat->j;
690       for (brow=0; brow<mbs; brow++) {
691         grow = bs*(rstart + brow);
692         nz   = amat->i[brow+1] - amat->i[brow];
693         for (bcol=0; bcol<nz; bcol++) {
694           gcol = bs*(rstart + *jj); jj++;
695           for (col=0; col<bs; col++) {
696             for (row=0; row<bs; row++) {
697               vabs            = PetscAbsScalar(*v); v++;
698               rsum[gcol+col] += vabs;
699               /* non-diagonal block */
700               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
701             }
702           }
703         }
704         PetscCall(PetscLogFlops(nz*bs*bs));
705       }
706       /* Bmat */
707       v = bmat->a; jj = bmat->j;
708       for (brow=0; brow<mbs; brow++) {
709         grow = bs*(rstart + brow);
710         nz = bmat->i[brow+1] - bmat->i[brow];
711         for (bcol=0; bcol<nz; bcol++) {
712           gcol = bs*garray[*jj]; jj++;
713           for (col=0; col<bs; col++) {
714             for (row=0; row<bs; row++) {
715               vabs            = PetscAbsScalar(*v); v++;
716               rsum[gcol+col] += vabs;
717               rsum[grow+row] += vabs;
718             }
719           }
720         }
721         PetscCall(PetscLogFlops(nz*bs*bs));
722       }
723       PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
724       *norm = 0.0;
725       for (col=0; col<mat->cmap->N; col++) {
726         if (rsum2[col] > *norm) *norm = rsum2[col];
727       }
728       PetscCall(PetscFree2(rsum,rsum2));
729     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
730   }
731   PetscFunctionReturn(0);
732 }
733 
734 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
735 {
736   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
737   PetscInt       nstash,reallocs;
738 
739   PetscFunctionBegin;
740   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
741 
742   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
743   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
744   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
745   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
746   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
747   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
752 {
753   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
754   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
755   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
756   PetscInt       *row,*col;
757   PetscBool      other_disassembled;
758   PetscMPIInt    n;
759   PetscBool      r1,r2,r3;
760   MatScalar      *val;
761 
762   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
763   PetscFunctionBegin;
764   if (!baij->donotstash &&  !mat->nooffprocentries) {
765     while (1) {
766       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
767       if (!flg) break;
768 
769       for (i=0; i<n;) {
770         /* Now identify the consecutive vals belonging to the same row */
771         for (j=i,rstart=row[j]; j<n; j++) {
772           if (row[j] != rstart) break;
773         }
774         if (j < n) ncols = j-i;
775         else       ncols = n-i;
776         /* Now assemble all these values with a single function call */
777         PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
778         i    = j;
779       }
780     }
781     PetscCall(MatStashScatterEnd_Private(&mat->stash));
782     /* Now process the block-stash. Since the values are stashed column-oriented,
783        set the roworiented flag to column oriented, and after MatSetValues()
784        restore the original flags */
785     r1 = baij->roworiented;
786     r2 = a->roworiented;
787     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
788 
789     baij->roworiented = PETSC_FALSE;
790     a->roworiented    = PETSC_FALSE;
791 
792     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
793     while (1) {
794       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
795       if (!flg) break;
796 
797       for (i=0; i<n;) {
798         /* Now identify the consecutive vals belonging to the same row */
799         for (j=i,rstart=row[j]; j<n; j++) {
800           if (row[j] != rstart) break;
801         }
802         if (j < n) ncols = j-i;
803         else       ncols = n-i;
804         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
805         i    = j;
806       }
807     }
808     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
809 
810     baij->roworiented = r1;
811     a->roworiented    = r2;
812 
813     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
814   }
815 
816   PetscCall(MatAssemblyBegin(baij->A,mode));
817   PetscCall(MatAssemblyEnd(baij->A,mode));
818 
819   /* determine if any processor has disassembled, if so we must
820      also disassemble ourselves, in order that we may reassemble. */
821   /*
822      if nonzero structure of submatrix B cannot change then we know that
823      no processor disassembled thus we can skip this stuff
824   */
825   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
826     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
827     if (mat->was_assembled && !other_disassembled) {
828       PetscCall(MatDisAssemble_MPISBAIJ(mat));
829     }
830   }
831 
832   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
833     PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
834   }
835   PetscCall(MatAssemblyBegin(baij->B,mode));
836   PetscCall(MatAssemblyEnd(baij->B,mode));
837 
838   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
839 
840   baij->rowvalues = NULL;
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
844     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
845     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
851 #include <petscdraw.h>
852 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
853 {
854   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
855   PetscInt          bs   = mat->rmap->bs;
856   PetscMPIInt       rank = baij->rank;
857   PetscBool         iascii,isdraw;
858   PetscViewer       sviewer;
859   PetscViewerFormat format;
860 
861   PetscFunctionBegin;
862   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
863   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
864   if (iascii) {
865     PetscCall(PetscViewerGetFormat(viewer,&format));
866     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
867       MatInfo info;
868       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
869       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
870       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
871       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
872       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
873       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
874       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
875       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
876       PetscCall(PetscViewerFlush(viewer));
877       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
878       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
879       PetscCall(VecScatterView(baij->Mvctx,viewer));
880       PetscFunctionReturn(0);
881     } else if (format == PETSC_VIEWER_ASCII_INFO) {
882       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
883       PetscFunctionReturn(0);
884     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
885       PetscFunctionReturn(0);
886     }
887   }
888 
889   if (isdraw) {
890     PetscDraw draw;
891     PetscBool isnull;
892     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
893     PetscCall(PetscDrawIsNull(draw,&isnull));
894     if (isnull) PetscFunctionReturn(0);
895   }
896 
897   {
898     /* assemble the entire matrix onto first processor. */
899     Mat          A;
900     Mat_SeqSBAIJ *Aloc;
901     Mat_SeqBAIJ  *Bloc;
902     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
903     MatScalar    *a;
904     const char   *matname;
905 
906     /* Should this be the same type as mat? */
907     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
908     if (rank == 0) {
909       PetscCall(MatSetSizes(A,M,N,M,N));
910     } else {
911       PetscCall(MatSetSizes(A,0,0,M,N));
912     }
913     PetscCall(MatSetType(A,MATMPISBAIJ));
914     PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
915     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
916     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
917 
918     /* copy over the A part */
919     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
920     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
921     PetscCall(PetscMalloc1(bs,&rvals));
922 
923     for (i=0; i<mbs; i++) {
924       rvals[0] = bs*(baij->rstartbs + i);
925       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
926       for (j=ai[i]; j<ai[i+1]; j++) {
927         col = (baij->cstartbs+aj[j])*bs;
928         for (k=0; k<bs; k++) {
929           PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
930           col++;
931           a += bs;
932         }
933       }
934     }
935     /* copy over the B part */
936     Bloc = (Mat_SeqBAIJ*)baij->B->data;
937     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
938     for (i=0; i<mbs; i++) {
939 
940       rvals[0] = bs*(baij->rstartbs + i);
941       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
942       for (j=ai[i]; j<ai[i+1]; j++) {
943         col = baij->garray[aj[j]]*bs;
944         for (k=0; k<bs; k++) {
945           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
946           col++;
947           a += bs;
948         }
949       }
950     }
951     PetscCall(PetscFree(rvals));
952     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
953     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
954     /*
955        Everyone has to call to draw the matrix since the graphics waits are
956        synchronized across all processors that share the PetscDraw object
957     */
958     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
959     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
960     if (rank == 0) {
961       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname));
962       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer));
963     }
964     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
965     PetscCall(PetscViewerFlush(viewer));
966     PetscCall(MatDestroy(&A));
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 /* Used for both MPIBAIJ and MPISBAIJ matrices */
972 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
973 
974 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
975 {
976   PetscBool      iascii,isdraw,issocket,isbinary;
977 
978   PetscFunctionBegin;
979   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
980   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
981   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
982   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
983   if (iascii || isdraw || issocket) {
984     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer));
985   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat,viewer));
986   PetscFunctionReturn(0);
987 }
988 
989 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
990 {
991   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
992 
993   PetscFunctionBegin;
994 #if defined(PETSC_USE_LOG)
995   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
996 #endif
997   PetscCall(MatStashDestroy_Private(&mat->stash));
998   PetscCall(MatStashDestroy_Private(&mat->bstash));
999   PetscCall(MatDestroy(&baij->A));
1000   PetscCall(MatDestroy(&baij->B));
1001 #if defined(PETSC_USE_CTABLE)
1002   PetscCall(PetscTableDestroy(&baij->colmap));
1003 #else
1004   PetscCall(PetscFree(baij->colmap));
1005 #endif
1006   PetscCall(PetscFree(baij->garray));
1007   PetscCall(VecDestroy(&baij->lvec));
1008   PetscCall(VecScatterDestroy(&baij->Mvctx));
1009   PetscCall(VecDestroy(&baij->slvec0));
1010   PetscCall(VecDestroy(&baij->slvec0b));
1011   PetscCall(VecDestroy(&baij->slvec1));
1012   PetscCall(VecDestroy(&baij->slvec1a));
1013   PetscCall(VecDestroy(&baij->slvec1b));
1014   PetscCall(VecScatterDestroy(&baij->sMvctx));
1015   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1016   PetscCall(PetscFree(baij->barray));
1017   PetscCall(PetscFree(baij->hd));
1018   PetscCall(VecDestroy(&baij->diag));
1019   PetscCall(VecDestroy(&baij->bb1));
1020   PetscCall(VecDestroy(&baij->xx1));
1021 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1022   PetscCall(PetscFree(baij->setvaluescopy));
1023 #endif
1024   PetscCall(PetscFree(baij->in_loc));
1025   PetscCall(PetscFree(baij->v_loc));
1026   PetscCall(PetscFree(baij->rangebs));
1027   PetscCall(PetscFree(mat->data));
1028 
1029   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1030   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1031   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1032   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL));
1033   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL));
1034 #if defined(PETSC_HAVE_ELEMENTAL)
1035   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL));
1036 #endif
1037 #if defined(PETSC_HAVE_SCALAPACK)
1038   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL));
1039 #endif
1040   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL));
1041   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL));
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1046 {
1047   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1048   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1049   PetscScalar       *from;
1050   const PetscScalar *x;
1051 
1052   PetscFunctionBegin;
1053   /* diagonal part */
1054   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1055   PetscCall(VecSet(a->slvec1b,0.0));
1056 
1057   /* subdiagonal part */
1058   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1059   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1060 
1061   /* copy x into the vec slvec0 */
1062   PetscCall(VecGetArray(a->slvec0,&from));
1063   PetscCall(VecGetArrayRead(xx,&x));
1064 
1065   PetscCall(PetscArraycpy(from,x,bs*mbs));
1066   PetscCall(VecRestoreArray(a->slvec0,&from));
1067   PetscCall(VecRestoreArrayRead(xx,&x));
1068 
1069   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1070   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1071   /* supperdiagonal part */
1072   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1077 {
1078   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1079   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1080   PetscScalar       *from;
1081   const PetscScalar *x;
1082 
1083   PetscFunctionBegin;
1084   /* diagonal part */
1085   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1086   PetscCall(VecSet(a->slvec1b,0.0));
1087 
1088   /* subdiagonal part */
1089   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1090 
1091   /* copy x into the vec slvec0 */
1092   PetscCall(VecGetArray(a->slvec0,&from));
1093   PetscCall(VecGetArrayRead(xx,&x));
1094 
1095   PetscCall(PetscArraycpy(from,x,bs*mbs));
1096   PetscCall(VecRestoreArray(a->slvec0,&from));
1097   PetscCall(VecRestoreArrayRead(xx,&x));
1098 
1099   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1100   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1101   /* supperdiagonal part */
1102   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1107 {
1108   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1109   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1110   PetscScalar       *from,zero=0.0;
1111   const PetscScalar *x;
1112 
1113   PetscFunctionBegin;
1114   /* diagonal part */
1115   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1116   PetscCall(VecSet(a->slvec1b,zero));
1117 
1118   /* subdiagonal part */
1119   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1120   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1121 
1122   /* copy x into the vec slvec0 */
1123   PetscCall(VecGetArray(a->slvec0,&from));
1124   PetscCall(VecGetArrayRead(xx,&x));
1125   PetscCall(PetscArraycpy(from,x,bs*mbs));
1126   PetscCall(VecRestoreArray(a->slvec0,&from));
1127 
1128   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1129   PetscCall(VecRestoreArrayRead(xx,&x));
1130   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1131 
1132   /* supperdiagonal part */
1133   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1138 {
1139   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1140   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1141   PetscScalar       *from,zero=0.0;
1142   const PetscScalar *x;
1143 
1144   PetscFunctionBegin;
1145   /* diagonal part */
1146   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1147   PetscCall(VecSet(a->slvec1b,zero));
1148 
1149   /* subdiagonal part */
1150   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1151 
1152   /* copy x into the vec slvec0 */
1153   PetscCall(VecGetArray(a->slvec0,&from));
1154   PetscCall(VecGetArrayRead(xx,&x));
1155   PetscCall(PetscArraycpy(from,x,bs*mbs));
1156   PetscCall(VecRestoreArray(a->slvec0,&from));
1157 
1158   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1159   PetscCall(VecRestoreArrayRead(xx,&x));
1160   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1161 
1162   /* supperdiagonal part */
1163   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 /*
1168   This only works correctly for square matrices where the subblock A->A is the
1169    diagonal block
1170 */
1171 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1172 {
1173   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1174 
1175   PetscFunctionBegin;
1176   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1177   PetscCall(MatGetDiagonal(a->A,v));
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1182 {
1183   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1184 
1185   PetscFunctionBegin;
1186   PetscCall(MatScale(a->A,aa));
1187   PetscCall(MatScale(a->B,aa));
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1192 {
1193   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1194   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1195   PetscInt     bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1196   PetscInt     nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1197   PetscInt    *cmap,*idx_p,cstart = mat->rstartbs;
1198 
1199   PetscFunctionBegin;
1200   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1201   mat->getrowactive = PETSC_TRUE;
1202 
1203   if (!mat->rowvalues && (idx || v)) {
1204     /*
1205         allocate enough space to hold information from the longest row.
1206     */
1207     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1208     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1209     PetscInt     max = 1,mbs = mat->mbs,tmp;
1210     for (i=0; i<mbs; i++) {
1211       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1212       if (max < tmp) max = tmp;
1213     }
1214     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1215   }
1216 
1217   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1218   lrow = row - brstart;  /* local row index */
1219 
1220   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1221   if (!v)   {pvA = NULL; pvB = NULL;}
1222   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1223   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1224   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1225   nztot = nzA + nzB;
1226 
1227   cmap = mat->garray;
1228   if (v  || idx) {
1229     if (nztot) {
1230       /* Sort by increasing column numbers, assuming A and B already sorted */
1231       PetscInt imark = -1;
1232       if (v) {
1233         *v = v_p = mat->rowvalues;
1234         for (i=0; i<nzB; i++) {
1235           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1236           else break;
1237         }
1238         imark = i;
1239         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1240         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1241       }
1242       if (idx) {
1243         *idx = idx_p = mat->rowindices;
1244         if (imark > -1) {
1245           for (i=0; i<imark; i++) {
1246             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1247           }
1248         } else {
1249           for (i=0; i<nzB; i++) {
1250             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1251             else break;
1252           }
1253           imark = i;
1254         }
1255         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1256         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1257       }
1258     } else {
1259       if (idx) *idx = NULL;
1260       if (v)   *v   = NULL;
1261     }
1262   }
1263   *nz  = nztot;
1264   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1265   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1270 {
1271   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1272 
1273   PetscFunctionBegin;
1274   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1275   baij->getrowactive = PETSC_FALSE;
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1280 {
1281   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1282   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1283 
1284   PetscFunctionBegin;
1285   aA->getrow_utriangular = PETSC_TRUE;
1286   PetscFunctionReturn(0);
1287 }
1288 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1289 {
1290   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1291   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1292 
1293   PetscFunctionBegin;
1294   aA->getrow_utriangular = PETSC_FALSE;
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1299 {
1300   PetscFunctionBegin;
1301   if (PetscDefined(USE_COMPLEX)) {
1302     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
1303 
1304     PetscCall(MatConjugate(a->A));
1305     PetscCall(MatConjugate(a->B));
1306   }
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313 
1314   PetscFunctionBegin;
1315   PetscCall(MatRealPart(a->A));
1316   PetscCall(MatRealPart(a->B));
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1321 {
1322   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1323 
1324   PetscFunctionBegin;
1325   PetscCall(MatImaginaryPart(a->A));
1326   PetscCall(MatImaginaryPart(a->B));
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1331    Input: isrow       - distributed(parallel),
1332           iscol_local - locally owned (seq)
1333 */
1334 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1335 {
1336   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1337   const PetscInt *ptr1,*ptr2;
1338 
1339   PetscFunctionBegin;
1340   PetscCall(ISGetLocalSize(isrow,&sz1));
1341   PetscCall(ISGetLocalSize(iscol_local,&sz2));
1342   if (sz1 > sz2) {
1343     *flg = PETSC_FALSE;
1344     PetscFunctionReturn(0);
1345   }
1346 
1347   PetscCall(ISGetIndices(isrow,&ptr1));
1348   PetscCall(ISGetIndices(iscol_local,&ptr2));
1349 
1350   PetscCall(PetscMalloc1(sz1,&a1));
1351   PetscCall(PetscMalloc1(sz2,&a2));
1352   PetscCall(PetscArraycpy(a1,ptr1,sz1));
1353   PetscCall(PetscArraycpy(a2,ptr2,sz2));
1354   PetscCall(PetscSortInt(sz1,a1));
1355   PetscCall(PetscSortInt(sz2,a2));
1356 
1357   nmatch=0;
1358   k     = 0;
1359   for (i=0; i<sz1; i++) {
1360     for (j=k; j<sz2; j++) {
1361       if (a1[i] == a2[j]) {
1362         k = j; nmatch++;
1363         break;
1364       }
1365     }
1366   }
1367   PetscCall(ISRestoreIndices(isrow,&ptr1));
1368   PetscCall(ISRestoreIndices(iscol_local,&ptr2));
1369   PetscCall(PetscFree(a1));
1370   PetscCall(PetscFree(a2));
1371   if (nmatch < sz1) {
1372     *flg = PETSC_FALSE;
1373   } else {
1374     *flg = PETSC_TRUE;
1375   }
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1380 {
1381   IS             iscol_local;
1382   PetscInt       csize;
1383   PetscBool      isequal;
1384 
1385   PetscFunctionBegin;
1386   PetscCall(ISGetLocalSize(iscol,&csize));
1387   if (call == MAT_REUSE_MATRIX) {
1388     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1389     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1390   } else {
1391     PetscBool issorted;
1392 
1393     PetscCall(ISAllGather(iscol,&iscol_local));
1394     PetscCall(ISEqual_private(isrow,iscol_local,&isequal));
1395     PetscCall(ISSorted(iscol_local, &issorted));
1396     PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted");
1397   }
1398 
1399   /* now call MatCreateSubMatrix_MPIBAIJ() */
1400   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1401   if (call == MAT_INITIAL_MATRIX) {
1402     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1403     PetscCall(ISDestroy(&iscol_local));
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1409 {
1410   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1411 
1412   PetscFunctionBegin;
1413   PetscCall(MatZeroEntries(l->A));
1414   PetscCall(MatZeroEntries(l->B));
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1419 {
1420   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1421   Mat            A  = a->A,B = a->B;
1422   PetscLogDouble isend[5],irecv[5];
1423 
1424   PetscFunctionBegin;
1425   info->block_size = (PetscReal)matin->rmap->bs;
1426 
1427   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1428 
1429   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1430   isend[3] = info->memory;  isend[4] = info->mallocs;
1431 
1432   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1433 
1434   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1435   isend[3] += info->memory;  isend[4] += info->mallocs;
1436   if (flag == MAT_LOCAL) {
1437     info->nz_used      = isend[0];
1438     info->nz_allocated = isend[1];
1439     info->nz_unneeded  = isend[2];
1440     info->memory       = isend[3];
1441     info->mallocs      = isend[4];
1442   } else if (flag == MAT_GLOBAL_MAX) {
1443     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1444 
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else if (flag == MAT_GLOBAL_SUM) {
1451     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1452 
1453     info->nz_used      = irecv[0];
1454     info->nz_allocated = irecv[1];
1455     info->nz_unneeded  = irecv[2];
1456     info->memory       = irecv[3];
1457     info->mallocs      = irecv[4];
1458   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1459   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1460   info->fill_ratio_needed = 0;
1461   info->factor_mallocs    = 0;
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1466 {
1467   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1468   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1469 
1470   PetscFunctionBegin;
1471   switch (op) {
1472   case MAT_NEW_NONZERO_LOCATIONS:
1473   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1474   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1475   case MAT_KEEP_NONZERO_PATTERN:
1476   case MAT_SUBMAT_SINGLEIS:
1477   case MAT_NEW_NONZERO_LOCATION_ERR:
1478     MatCheckPreallocated(A,1);
1479     PetscCall(MatSetOption(a->A,op,flg));
1480     PetscCall(MatSetOption(a->B,op,flg));
1481     break;
1482   case MAT_ROW_ORIENTED:
1483     MatCheckPreallocated(A,1);
1484     a->roworiented = flg;
1485 
1486     PetscCall(MatSetOption(a->A,op,flg));
1487     PetscCall(MatSetOption(a->B,op,flg));
1488     break;
1489   case MAT_FORCE_DIAGONAL_ENTRIES:
1490   case MAT_SORTED_FULL:
1491     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1492     break;
1493   case MAT_IGNORE_OFF_PROC_ENTRIES:
1494     a->donotstash = flg;
1495     break;
1496   case MAT_USE_HASH_TABLE:
1497     a->ht_flag = flg;
1498     break;
1499   case MAT_HERMITIAN:
1500     MatCheckPreallocated(A,1);
1501     PetscCall(MatSetOption(a->A,op,flg));
1502 #if defined(PETSC_USE_COMPLEX)
1503     if (flg) { /* need different mat-vec ops */
1504       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1505       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1506       A->ops->multtranspose    = NULL;
1507       A->ops->multtransposeadd = NULL;
1508       A->symmetric = PETSC_FALSE;
1509     }
1510 #endif
1511     break;
1512   case MAT_SPD:
1513   case MAT_SYMMETRIC:
1514     MatCheckPreallocated(A,1);
1515     PetscCall(MatSetOption(a->A,op,flg));
1516 #if defined(PETSC_USE_COMPLEX)
1517     if (flg) { /* restore to use default mat-vec ops */
1518       A->ops->mult             = MatMult_MPISBAIJ;
1519       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1520       A->ops->multtranspose    = MatMult_MPISBAIJ;
1521       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1522     }
1523 #endif
1524     break;
1525   case MAT_STRUCTURALLY_SYMMETRIC:
1526     MatCheckPreallocated(A,1);
1527     PetscCall(MatSetOption(a->A,op,flg));
1528     break;
1529   case MAT_SYMMETRY_ETERNAL:
1530     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1531     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1532     break;
1533   case MAT_IGNORE_LOWER_TRIANGULAR:
1534     aA->ignore_ltriangular = flg;
1535     break;
1536   case MAT_ERROR_LOWER_TRIANGULAR:
1537     aA->ignore_ltriangular = flg;
1538     break;
1539   case MAT_GETROW_UPPERTRIANGULAR:
1540     aA->getrow_utriangular = flg;
1541     break;
1542   default:
1543     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1544   }
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1549 {
1550   PetscFunctionBegin;
1551   if (reuse == MAT_INITIAL_MATRIX) {
1552     PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B));
1553   }  else if (reuse == MAT_REUSE_MATRIX) {
1554     PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN));
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1560 {
1561   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1562   Mat            a     = baij->A, b=baij->B;
1563   PetscInt       nv,m,n;
1564   PetscBool      flg;
1565 
1566   PetscFunctionBegin;
1567   if (ll != rr) {
1568     PetscCall(VecEqual(ll,rr,&flg));
1569     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same");
1570   }
1571   if (!ll) PetscFunctionReturn(0);
1572 
1573   PetscCall(MatGetLocalSize(mat,&m,&n));
1574   PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n);
1575 
1576   PetscCall(VecGetLocalSize(rr,&nv));
1577   PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1578 
1579   PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1580 
1581   /* left diagonalscale the off-diagonal part */
1582   PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1583 
1584   /* scale the diagonal part */
1585   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1586 
1587   /* right diagonalscale the off-diagonal part */
1588   PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1589   PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1594 {
1595   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1596 
1597   PetscFunctionBegin;
1598   PetscCall(MatSetUnfactored(a->A));
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1603 
1604 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1605 {
1606   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1607   Mat            a,b,c,d;
1608   PetscBool      flg;
1609 
1610   PetscFunctionBegin;
1611   a = matA->A; b = matA->B;
1612   c = matB->A; d = matB->B;
1613 
1614   PetscCall(MatEqual(a,c,&flg));
1615   if (flg) {
1616     PetscCall(MatEqual(b,d,&flg));
1617   }
1618   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1623 {
1624   PetscBool      isbaij;
1625 
1626   PetscFunctionBegin;
1627   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,""));
1628   PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1629   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1630   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1631     PetscCall(MatGetRowUpperTriangular(A));
1632     PetscCall(MatCopy_Basic(A,B,str));
1633     PetscCall(MatRestoreRowUpperTriangular(A));
1634   } else {
1635     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1636     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1637 
1638     PetscCall(MatCopy(a->A,b->A,str));
1639     PetscCall(MatCopy(a->B,b->B,str));
1640   }
1641   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1642   PetscFunctionReturn(0);
1643 }
1644 
1645 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1646 {
1647   PetscFunctionBegin;
1648   PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1653 {
1654   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1655   PetscBLASInt   bnz,one=1;
1656   Mat_SeqSBAIJ   *xa,*ya;
1657   Mat_SeqBAIJ    *xb,*yb;
1658 
1659   PetscFunctionBegin;
1660   if (str == SAME_NONZERO_PATTERN) {
1661     PetscScalar alpha = a;
1662     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1663     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1664     PetscCall(PetscBLASIntCast(xa->nz,&bnz));
1665     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1666     xb   = (Mat_SeqBAIJ*)xx->B->data;
1667     yb   = (Mat_SeqBAIJ*)yy->B->data;
1668     PetscCall(PetscBLASIntCast(xb->nz,&bnz));
1669     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1670     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1671   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1672     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
1673     PetscCall(MatAXPY_Basic(Y,a,X,str));
1674     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
1675   } else {
1676     Mat      B;
1677     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1678     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1679     PetscCall(MatGetRowUpperTriangular(X));
1680     PetscCall(MatGetRowUpperTriangular(Y));
1681     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1682     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1683     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1684     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1685     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1686     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1687     PetscCall(MatSetType(B,MATMPISBAIJ));
1688     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d));
1689     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1690     PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1691     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1692     PetscCall(MatHeaderMerge(Y,&B));
1693     PetscCall(PetscFree(nnz_d));
1694     PetscCall(PetscFree(nnz_o));
1695     PetscCall(MatRestoreRowUpperTriangular(X));
1696     PetscCall(MatRestoreRowUpperTriangular(Y));
1697   }
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1702 {
1703   PetscInt       i;
1704   PetscBool      flg;
1705 
1706   PetscFunctionBegin;
1707   PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */
1708   for (i=0; i<n; i++) {
1709     PetscCall(ISEqual(irow[i],icol[i],&flg));
1710     if (!flg) {
1711       PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1712     }
1713   }
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1718 {
1719   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1720   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1721 
1722   PetscFunctionBegin;
1723   if (!Y->preallocated) {
1724     PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
1725   } else if (!aij->nz) {
1726     PetscInt nonew = aij->nonew;
1727     PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
1728     aij->nonew = nonew;
1729   }
1730   PetscCall(MatShift_Basic(Y,a));
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1735 {
1736   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1737 
1738   PetscFunctionBegin;
1739   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1740   PetscCall(MatMissingDiagonal(a->A,missing,d));
1741   if (d) {
1742     PetscInt rstart;
1743     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1744     *d += rstart/A->rmap->bs;
1745 
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 
1750 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1751 {
1752   PetscFunctionBegin;
1753   *a = ((Mat_MPISBAIJ*)A->data)->A;
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 /* -------------------------------------------------------------------*/
1758 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1759                                        MatGetRow_MPISBAIJ,
1760                                        MatRestoreRow_MPISBAIJ,
1761                                        MatMult_MPISBAIJ,
1762                                /*  4*/ MatMultAdd_MPISBAIJ,
1763                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1764                                        MatMultAdd_MPISBAIJ,
1765                                        NULL,
1766                                        NULL,
1767                                        NULL,
1768                                /* 10*/ NULL,
1769                                        NULL,
1770                                        NULL,
1771                                        MatSOR_MPISBAIJ,
1772                                        MatTranspose_MPISBAIJ,
1773                                /* 15*/ MatGetInfo_MPISBAIJ,
1774                                        MatEqual_MPISBAIJ,
1775                                        MatGetDiagonal_MPISBAIJ,
1776                                        MatDiagonalScale_MPISBAIJ,
1777                                        MatNorm_MPISBAIJ,
1778                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1779                                        MatAssemblyEnd_MPISBAIJ,
1780                                        MatSetOption_MPISBAIJ,
1781                                        MatZeroEntries_MPISBAIJ,
1782                                /* 24*/ NULL,
1783                                        NULL,
1784                                        NULL,
1785                                        NULL,
1786                                        NULL,
1787                                /* 29*/ MatSetUp_MPISBAIJ,
1788                                        NULL,
1789                                        NULL,
1790                                        MatGetDiagonalBlock_MPISBAIJ,
1791                                        NULL,
1792                                /* 34*/ MatDuplicate_MPISBAIJ,
1793                                        NULL,
1794                                        NULL,
1795                                        NULL,
1796                                        NULL,
1797                                /* 39*/ MatAXPY_MPISBAIJ,
1798                                        MatCreateSubMatrices_MPISBAIJ,
1799                                        MatIncreaseOverlap_MPISBAIJ,
1800                                        MatGetValues_MPISBAIJ,
1801                                        MatCopy_MPISBAIJ,
1802                                /* 44*/ NULL,
1803                                        MatScale_MPISBAIJ,
1804                                        MatShift_MPISBAIJ,
1805                                        NULL,
1806                                        NULL,
1807                                /* 49*/ NULL,
1808                                        NULL,
1809                                        NULL,
1810                                        NULL,
1811                                        NULL,
1812                                /* 54*/ NULL,
1813                                        NULL,
1814                                        MatSetUnfactored_MPISBAIJ,
1815                                        NULL,
1816                                        MatSetValuesBlocked_MPISBAIJ,
1817                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1818                                        NULL,
1819                                        NULL,
1820                                        NULL,
1821                                        NULL,
1822                                /* 64*/ NULL,
1823                                        NULL,
1824                                        NULL,
1825                                        NULL,
1826                                        NULL,
1827                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1828                                        NULL,
1829                                        MatConvert_MPISBAIJ_Basic,
1830                                        NULL,
1831                                        NULL,
1832                                /* 74*/ NULL,
1833                                        NULL,
1834                                        NULL,
1835                                        NULL,
1836                                        NULL,
1837                                /* 79*/ NULL,
1838                                        NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        MatLoad_MPISBAIJ,
1842                                /* 84*/ NULL,
1843                                        NULL,
1844                                        NULL,
1845                                        NULL,
1846                                        NULL,
1847                                /* 89*/ NULL,
1848                                        NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        NULL,
1852                                /* 94*/ NULL,
1853                                        NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                /* 99*/ NULL,
1858                                        NULL,
1859                                        NULL,
1860                                        MatConjugate_MPISBAIJ,
1861                                        NULL,
1862                                /*104*/ NULL,
1863                                        MatRealPart_MPISBAIJ,
1864                                        MatImaginaryPart_MPISBAIJ,
1865                                        MatGetRowUpperTriangular_MPISBAIJ,
1866                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1867                                /*109*/ NULL,
1868                                        NULL,
1869                                        NULL,
1870                                        NULL,
1871                                        MatMissingDiagonal_MPISBAIJ,
1872                                /*114*/ NULL,
1873                                        NULL,
1874                                        NULL,
1875                                        NULL,
1876                                        NULL,
1877                                /*119*/ NULL,
1878                                        NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        NULL,
1882                                /*124*/ NULL,
1883                                        NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        NULL,
1887                                /*129*/ NULL,
1888                                        NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                /*134*/ NULL,
1893                                        NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        NULL,
1897                                /*139*/ MatSetBlockSizes_Default,
1898                                        NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        NULL,
1902                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1903                                        NULL,
1904                                        NULL,
1905                                        NULL
1906 };
1907 
1908 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1909 {
1910   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1911   PetscInt       i,mbs,Mbs;
1912   PetscMPIInt    size;
1913 
1914   PetscFunctionBegin;
1915   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
1916   PetscCall(PetscLayoutSetUp(B->rmap));
1917   PetscCall(PetscLayoutSetUp(B->cmap));
1918   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
1919   PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N);
1920   PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n);
1921 
1922   mbs = B->rmap->n/bs;
1923   Mbs = B->rmap->N/bs;
1924   PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs);
1925 
1926   B->rmap->bs = bs;
1927   b->bs2      = bs*bs;
1928   b->mbs      = mbs;
1929   b->Mbs      = Mbs;
1930   b->nbs      = B->cmap->n/bs;
1931   b->Nbs      = B->cmap->N/bs;
1932 
1933   for (i=0; i<=b->size; i++) {
1934     b->rangebs[i] = B->rmap->range[i]/bs;
1935   }
1936   b->rstartbs = B->rmap->rstart/bs;
1937   b->rendbs   = B->rmap->rend/bs;
1938 
1939   b->cstartbs = B->cmap->rstart/bs;
1940   b->cendbs   = B->cmap->rend/bs;
1941 
1942 #if defined(PETSC_USE_CTABLE)
1943   PetscCall(PetscTableDestroy(&b->colmap));
1944 #else
1945   PetscCall(PetscFree(b->colmap));
1946 #endif
1947   PetscCall(PetscFree(b->garray));
1948   PetscCall(VecDestroy(&b->lvec));
1949   PetscCall(VecScatterDestroy(&b->Mvctx));
1950   PetscCall(VecDestroy(&b->slvec0));
1951   PetscCall(VecDestroy(&b->slvec0b));
1952   PetscCall(VecDestroy(&b->slvec1));
1953   PetscCall(VecDestroy(&b->slvec1a));
1954   PetscCall(VecDestroy(&b->slvec1b));
1955   PetscCall(VecScatterDestroy(&b->sMvctx));
1956 
1957   /* Because the B will have been resized we simply destroy it and create a new one each time */
1958   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
1959   PetscCall(MatDestroy(&b->B));
1960   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
1961   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
1962   PetscCall(MatSetType(b->B,MATSEQBAIJ));
1963   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
1964 
1965   if (!B->preallocated) {
1966     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
1967     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
1968     PetscCall(MatSetType(b->A,MATSEQSBAIJ));
1969     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
1970     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
1971   }
1972 
1973   PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
1974   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
1975 
1976   B->preallocated  = PETSC_TRUE;
1977   B->was_assembled = PETSC_FALSE;
1978   B->assembled     = PETSC_FALSE;
1979   PetscFunctionReturn(0);
1980 }
1981 
1982 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1983 {
1984   PetscInt       m,rstart,cend;
1985   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
1986   const PetscInt *JJ    =NULL;
1987   PetscScalar    *values=NULL;
1988   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
1989   PetscBool      nooffprocentries;
1990 
1991   PetscFunctionBegin;
1992   PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
1993   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
1994   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
1995   PetscCall(PetscLayoutSetUp(B->rmap));
1996   PetscCall(PetscLayoutSetUp(B->cmap));
1997   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
1998   m      = B->rmap->n/bs;
1999   rstart = B->rmap->rstart/bs;
2000   cend   = B->cmap->rend/bs;
2001 
2002   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2003   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2004   for (i=0; i<m; i++) {
2005     nz = ii[i+1] - ii[i];
2006     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2007     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2008     JJ     = jj + ii[i];
2009     bd     = 0;
2010     for (j=0; j<nz; j++) {
2011       if (*JJ >= i + rstart) break;
2012       JJ++;
2013       bd++;
2014     }
2015     d  = 0;
2016     for (; j<nz; j++) {
2017       if (*JJ++ >= cend) break;
2018       d++;
2019     }
2020     d_nnz[i] = d;
2021     o_nnz[i] = nz - d - bd;
2022     nz       = nz - bd;
2023     nz_max = PetscMax(nz_max,nz);
2024   }
2025   PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2026   PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE));
2027   PetscCall(PetscFree2(d_nnz,o_nnz));
2028 
2029   values = (PetscScalar*)V;
2030   if (!values) {
2031     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2032   }
2033   for (i=0; i<m; i++) {
2034     PetscInt          row    = i + rstart;
2035     PetscInt          ncols  = ii[i+1] - ii[i];
2036     const PetscInt    *icols = jj + ii[i];
2037     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2038       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2039       PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2040     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2041       PetscInt j;
2042       for (j=0; j<ncols; j++) {
2043         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2044         PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2045       }
2046     }
2047   }
2048 
2049   if (!V) PetscCall(PetscFree(values));
2050   nooffprocentries    = B->nooffprocentries;
2051   B->nooffprocentries = PETSC_TRUE;
2052   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2053   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2054   B->nooffprocentries = nooffprocentries;
2055 
2056   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 /*MC
2061    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2062    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2063    the matrix is stored.
2064 
2065    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2066    can call MatSetOption(Mat, MAT_HERMITIAN);
2067 
2068    Options Database Keys:
2069 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2070 
2071    Notes:
2072      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2073      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2074 
2075    Level: beginner
2076 
2077 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2078 M*/
2079 
2080 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2081 {
2082   Mat_MPISBAIJ   *b;
2083   PetscBool      flg = PETSC_FALSE;
2084 
2085   PetscFunctionBegin;
2086   PetscCall(PetscNewLog(B,&b));
2087   B->data = (void*)b;
2088   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2089 
2090   B->ops->destroy = MatDestroy_MPISBAIJ;
2091   B->ops->view    = MatView_MPISBAIJ;
2092   B->assembled    = PETSC_FALSE;
2093   B->insertmode   = NOT_SET_VALUES;
2094 
2095   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2096   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2097 
2098   /* build local table of row and column ownerships */
2099   PetscCall(PetscMalloc1(b->size+2,&b->rangebs));
2100 
2101   /* build cache for off array entries formed */
2102   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2103 
2104   b->donotstash  = PETSC_FALSE;
2105   b->colmap      = NULL;
2106   b->garray      = NULL;
2107   b->roworiented = PETSC_TRUE;
2108 
2109   /* stuff used in block assembly */
2110   b->barray = NULL;
2111 
2112   /* stuff used for matrix vector multiply */
2113   b->lvec    = NULL;
2114   b->Mvctx   = NULL;
2115   b->slvec0  = NULL;
2116   b->slvec0b = NULL;
2117   b->slvec1  = NULL;
2118   b->slvec1a = NULL;
2119   b->slvec1b = NULL;
2120   b->sMvctx  = NULL;
2121 
2122   /* stuff for MatGetRow() */
2123   b->rowindices   = NULL;
2124   b->rowvalues    = NULL;
2125   b->getrowactive = PETSC_FALSE;
2126 
2127   /* hash table stuff */
2128   b->ht           = NULL;
2129   b->hd           = NULL;
2130   b->ht_size      = 0;
2131   b->ht_flag      = PETSC_FALSE;
2132   b->ht_fact      = 0;
2133   b->ht_total_ct  = 0;
2134   b->ht_insert_ct = 0;
2135 
2136   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2137   b->ijonly = PETSC_FALSE;
2138 
2139   b->in_loc = NULL;
2140   b->v_loc  = NULL;
2141   b->n_loc  = 0;
2142 
2143   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ));
2144   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ));
2145   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ));
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2147 #if defined(PETSC_HAVE_ELEMENTAL)
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental));
2149 #endif
2150 #if defined(PETSC_HAVE_SCALAPACK)
2151   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK));
2152 #endif
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic));
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic));
2155 
2156   B->symmetric                  = PETSC_TRUE;
2157   B->structurally_symmetric     = PETSC_TRUE;
2158   B->symmetric_set              = PETSC_TRUE;
2159   B->structurally_symmetric_set = PETSC_TRUE;
2160   B->symmetric_eternal          = PETSC_TRUE;
2161 #if defined(PETSC_USE_COMPLEX)
2162   B->hermitian                  = PETSC_FALSE;
2163   B->hermitian_set              = PETSC_FALSE;
2164 #else
2165   B->hermitian                  = PETSC_TRUE;
2166   B->hermitian_set              = PETSC_TRUE;
2167 #endif
2168 
2169   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ));
2170   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2171   PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL));
2172   if (flg) {
2173     PetscReal fact = 1.39;
2174     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2175     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2176     if (fact <= 1.0) fact = 1.39;
2177     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2178     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2179   }
2180   PetscOptionsEnd();
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 /*MC
2185    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2186 
2187    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2188    and MATMPISBAIJ otherwise.
2189 
2190    Options Database Keys:
2191 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2192 
2193   Level: beginner
2194 
2195 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2196 M*/
2197 
2198 /*@C
2199    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2200    the user should preallocate the matrix storage by setting the parameters
2201    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2202    performance can be increased by more than a factor of 50.
2203 
2204    Collective on Mat
2205 
2206    Input Parameters:
2207 +  B - the matrix
2208 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2209           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2210 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2211            submatrix  (same for all local rows)
2212 .  d_nnz - array containing the number of block nonzeros in the various block rows
2213            in the upper triangular and diagonal part of the in diagonal portion of the local
2214            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2215            for the diagonal entry and set a value even if it is zero.
2216 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2217            submatrix (same for all local rows).
2218 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2219            off-diagonal portion of the local submatrix that is right of the diagonal
2220            (possibly different for each block row) or NULL.
2221 
2222    Options Database Keys:
2223 +   -mat_no_unroll - uses code that does not unroll the loops in the
2224                      block calculations (much slower)
2225 -   -mat_block_size - size of the blocks to use
2226 
2227    Notes:
2228 
2229    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2230    than it must be used on all processors that share the object for that argument.
2231 
2232    If the *_nnz parameter is given then the *_nz parameter is ignored
2233 
2234    Storage Information:
2235    For a square global matrix we define each processor's diagonal portion
2236    to be its local rows and the corresponding columns (a square submatrix);
2237    each processor's off-diagonal portion encompasses the remainder of the
2238    local matrix (a rectangular submatrix).
2239 
2240    The user can specify preallocated storage for the diagonal part of
2241    the local submatrix with either d_nz or d_nnz (not both).  Set
2242    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2243    memory allocation.  Likewise, specify preallocated storage for the
2244    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2245 
2246    You can call MatGetInfo() to get information on how effective the preallocation was;
2247    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2248    You can also run with the option -info and look for messages with the string
2249    malloc in them to see if additional memory allocation was needed.
2250 
2251    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2252    the figure below we depict these three local rows and all columns (0-11).
2253 
2254 .vb
2255            0 1 2 3 4 5 6 7 8 9 10 11
2256           --------------------------
2257    row 3  |. . . d d d o o o o  o  o
2258    row 4  |. . . d d d o o o o  o  o
2259    row 5  |. . . d d d o o o o  o  o
2260           --------------------------
2261 .ve
2262 
2263    Thus, any entries in the d locations are stored in the d (diagonal)
2264    submatrix, and any entries in the o locations are stored in the
2265    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2266    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2267 
2268    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2269    plus the diagonal part of the d matrix,
2270    and o_nz should indicate the number of block nonzeros per row in the o matrix
2271 
2272    In general, for PDE problems in which most nonzeros are near the diagonal,
2273    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2274    or you will get TERRIBLE performance; see the users' manual chapter on
2275    matrices.
2276 
2277    Level: intermediate
2278 
2279 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2280 @*/
2281 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2282 {
2283   PetscFunctionBegin;
2284   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2285   PetscValidType(B,1);
2286   PetscValidLogicalCollectiveInt(B,bs,2);
2287   PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 /*@C
2292    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2293    (block compressed row).  For good matrix assembly performance
2294    the user should preallocate the matrix storage by setting the parameters
2295    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2296    performance can be increased by more than a factor of 50.
2297 
2298    Collective
2299 
2300    Input Parameters:
2301 +  comm - MPI communicator
2302 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2303           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2304 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2305            This value should be the same as the local size used in creating the
2306            y vector for the matrix-vector product y = Ax.
2307 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2308            This value should be the same as the local size used in creating the
2309            x vector for the matrix-vector product y = Ax.
2310 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2311 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2312 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2313            submatrix  (same for all local rows)
2314 .  d_nnz - array containing the number of block nonzeros in the various block rows
2315            in the upper triangular portion of the in diagonal portion of the local
2316            (possibly different for each block block row) or NULL.
2317            If you plan to factor the matrix you must leave room for the diagonal entry and
2318            set its value even if it is zero.
2319 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2320            submatrix (same for all local rows).
2321 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2322            off-diagonal portion of the local submatrix (possibly different for
2323            each block row) or NULL.
2324 
2325    Output Parameter:
2326 .  A - the matrix
2327 
2328    Options Database Keys:
2329 +   -mat_no_unroll - uses code that does not unroll the loops in the
2330                      block calculations (much slower)
2331 .   -mat_block_size - size of the blocks to use
2332 -   -mat_mpi - use the parallel matrix data structures even on one processor
2333                (defaults to using SeqBAIJ format on one processor)
2334 
2335    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2336    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2337    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2338 
2339    Notes:
2340    The number of rows and columns must be divisible by blocksize.
2341    This matrix type does not support complex Hermitian operation.
2342 
2343    The user MUST specify either the local or global matrix dimensions
2344    (possibly both).
2345 
2346    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2347    than it must be used on all processors that share the object for that argument.
2348 
2349    If the *_nnz parameter is given then the *_nz parameter is ignored
2350 
2351    Storage Information:
2352    For a square global matrix we define each processor's diagonal portion
2353    to be its local rows and the corresponding columns (a square submatrix);
2354    each processor's off-diagonal portion encompasses the remainder of the
2355    local matrix (a rectangular submatrix).
2356 
2357    The user can specify preallocated storage for the diagonal part of
2358    the local submatrix with either d_nz or d_nnz (not both).  Set
2359    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2360    memory allocation.  Likewise, specify preallocated storage for the
2361    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2362 
2363    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2364    the figure below we depict these three local rows and all columns (0-11).
2365 
2366 .vb
2367            0 1 2 3 4 5 6 7 8 9 10 11
2368           --------------------------
2369    row 3  |. . . d d d o o o o  o  o
2370    row 4  |. . . d d d o o o o  o  o
2371    row 5  |. . . d d d o o o o  o  o
2372           --------------------------
2373 .ve
2374 
2375    Thus, any entries in the d locations are stored in the d (diagonal)
2376    submatrix, and any entries in the o locations are stored in the
2377    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2378    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2379 
2380    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2381    plus the diagonal part of the d matrix,
2382    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2383    In general, for PDE problems in which most nonzeros are near the diagonal,
2384    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2385    or you will get TERRIBLE performance; see the users' manual chapter on
2386    matrices.
2387 
2388    Level: intermediate
2389 
2390 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2391 @*/
2392 
2393 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2394 {
2395   PetscMPIInt    size;
2396 
2397   PetscFunctionBegin;
2398   PetscCall(MatCreate(comm,A));
2399   PetscCall(MatSetSizes(*A,m,n,M,N));
2400   PetscCallMPI(MPI_Comm_size(comm,&size));
2401   if (size > 1) {
2402     PetscCall(MatSetType(*A,MATMPISBAIJ));
2403     PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
2404   } else {
2405     PetscCall(MatSetType(*A,MATSEQSBAIJ));
2406     PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
2407   }
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2412 {
2413   Mat            mat;
2414   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2415   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2416   PetscScalar    *array;
2417 
2418   PetscFunctionBegin;
2419   *newmat = NULL;
2420 
2421   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2422   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2423   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2424   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2425   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2426 
2427   mat->factortype   = matin->factortype;
2428   mat->preallocated = PETSC_TRUE;
2429   mat->assembled    = PETSC_TRUE;
2430   mat->insertmode   = NOT_SET_VALUES;
2431 
2432   a      = (Mat_MPISBAIJ*)mat->data;
2433   a->bs2 = oldmat->bs2;
2434   a->mbs = oldmat->mbs;
2435   a->nbs = oldmat->nbs;
2436   a->Mbs = oldmat->Mbs;
2437   a->Nbs = oldmat->Nbs;
2438 
2439   a->size         = oldmat->size;
2440   a->rank         = oldmat->rank;
2441   a->donotstash   = oldmat->donotstash;
2442   a->roworiented  = oldmat->roworiented;
2443   a->rowindices   = NULL;
2444   a->rowvalues    = NULL;
2445   a->getrowactive = PETSC_FALSE;
2446   a->barray       = NULL;
2447   a->rstartbs     = oldmat->rstartbs;
2448   a->rendbs       = oldmat->rendbs;
2449   a->cstartbs     = oldmat->cstartbs;
2450   a->cendbs       = oldmat->cendbs;
2451 
2452   /* hash table stuff */
2453   a->ht           = NULL;
2454   a->hd           = NULL;
2455   a->ht_size      = 0;
2456   a->ht_flag      = oldmat->ht_flag;
2457   a->ht_fact      = oldmat->ht_fact;
2458   a->ht_total_ct  = 0;
2459   a->ht_insert_ct = 0;
2460 
2461   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2));
2462   if (oldmat->colmap) {
2463 #if defined(PETSC_USE_CTABLE)
2464     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2465 #else
2466     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
2467     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
2468     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
2469 #endif
2470   } else a->colmap = NULL;
2471 
2472   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2473     PetscCall(PetscMalloc1(len,&a->garray));
2474     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2475     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2476   } else a->garray = NULL;
2477 
2478   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
2479   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2480   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2481   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2482   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2483 
2484   PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0));
2485   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2486   PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1));
2487   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2488 
2489   PetscCall(VecGetLocalSize(a->slvec1,&nt));
2490   PetscCall(VecGetArray(a->slvec1,&array));
2491   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a));
2492   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b));
2493   PetscCall(VecRestoreArray(a->slvec1,&array));
2494   PetscCall(VecGetArray(a->slvec0,&array));
2495   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b));
2496   PetscCall(VecRestoreArray(a->slvec0,&array));
2497   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2498   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2499   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b));
2500   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a));
2501   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b));
2502 
2503   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2504   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2505   a->sMvctx = oldmat->sMvctx;
2506   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx));
2507 
2508   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2509   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2510   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2511   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2512   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2513   *newmat = mat;
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2518 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2519 
2520 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2521 {
2522   PetscBool      isbinary;
2523 
2524   PetscFunctionBegin;
2525   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2526   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2527   PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer));
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 /*XXXXX@
2532    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2533 
2534    Input Parameters:
2535 .  mat  - the matrix
2536 .  fact - factor
2537 
2538    Not Collective on Mat, each process can have a different hash factor
2539 
2540    Level: advanced
2541 
2542   Notes:
2543    This can also be set by the command line option: -mat_use_hash_table fact
2544 
2545 .seealso: `MatSetOption()`
2546 @XXXXX*/
2547 
2548 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2549 {
2550   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2551   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2552   PetscReal      atmp;
2553   PetscReal      *work,*svalues,*rvalues;
2554   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2555   PetscMPIInt    rank,size;
2556   PetscInt       *rowners_bs,dest,count,source;
2557   PetscScalar    *va;
2558   MatScalar      *ba;
2559   MPI_Status     stat;
2560 
2561   PetscFunctionBegin;
2562   PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2563   PetscCall(MatGetRowMaxAbs(a->A,v,NULL));
2564   PetscCall(VecGetArray(v,&va));
2565 
2566   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2567   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2568 
2569   bs  = A->rmap->bs;
2570   mbs = a->mbs;
2571   Mbs = a->Mbs;
2572   ba  = b->a;
2573   bi  = b->i;
2574   bj  = b->j;
2575 
2576   /* find ownerships */
2577   rowners_bs = A->rmap->range;
2578 
2579   /* each proc creates an array to be distributed */
2580   PetscCall(PetscCalloc1(bs*Mbs,&work));
2581 
2582   /* row_max for B */
2583   if (rank != size-1) {
2584     for (i=0; i<mbs; i++) {
2585       ncols = bi[1] - bi[0]; bi++;
2586       brow  = bs*i;
2587       for (j=0; j<ncols; j++) {
2588         bcol = bs*(*bj);
2589         for (kcol=0; kcol<bs; kcol++) {
2590           col  = bcol + kcol;                /* local col index */
2591           col += rowners_bs[rank+1];      /* global col index */
2592           for (krow=0; krow<bs; krow++) {
2593             atmp = PetscAbsScalar(*ba); ba++;
2594             row  = brow + krow;   /* local row index */
2595             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2596             if (work[col] < atmp) work[col] = atmp;
2597           }
2598         }
2599         bj++;
2600       }
2601     }
2602 
2603     /* send values to its owners */
2604     for (dest=rank+1; dest<size; dest++) {
2605       svalues = work + rowners_bs[dest];
2606       count   = rowners_bs[dest+1]-rowners_bs[dest];
2607       PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A)));
2608     }
2609   }
2610 
2611   /* receive values */
2612   if (rank) {
2613     rvalues = work;
2614     count   = rowners_bs[rank+1]-rowners_bs[rank];
2615     for (source=0; source<rank; source++) {
2616       PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat));
2617       /* process values */
2618       for (i=0; i<count; i++) {
2619         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2620       }
2621     }
2622   }
2623 
2624   PetscCall(VecRestoreArray(v,&va));
2625   PetscCall(PetscFree(work));
2626   PetscFunctionReturn(0);
2627 }
2628 
2629 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2630 {
2631   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2632   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2633   PetscScalar       *x,*ptr,*from;
2634   Vec               bb1;
2635   const PetscScalar *b;
2636 
2637   PetscFunctionBegin;
2638   PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
2639   PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2640 
2641   if (flag == SOR_APPLY_UPPER) {
2642     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2643     PetscFunctionReturn(0);
2644   }
2645 
2646   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2647     if (flag & SOR_ZERO_INITIAL_GUESS) {
2648       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx));
2649       its--;
2650     }
2651 
2652     PetscCall(VecDuplicate(bb,&bb1));
2653     while (its--) {
2654 
2655       /* lower triangular part: slvec0b = - B^T*xx */
2656       PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2657 
2658       /* copy xx into slvec0a */
2659       PetscCall(VecGetArray(mat->slvec0,&ptr));
2660       PetscCall(VecGetArray(xx,&x));
2661       PetscCall(PetscArraycpy(ptr,x,bs*mbs));
2662       PetscCall(VecRestoreArray(mat->slvec0,&ptr));
2663 
2664       PetscCall(VecScale(mat->slvec0,-1.0));
2665 
2666       /* copy bb into slvec1a */
2667       PetscCall(VecGetArray(mat->slvec1,&ptr));
2668       PetscCall(VecGetArrayRead(bb,&b));
2669       PetscCall(PetscArraycpy(ptr,b,bs*mbs));
2670       PetscCall(VecRestoreArray(mat->slvec1,&ptr));
2671 
2672       /* set slvec1b = 0 */
2673       PetscCall(VecSet(mat->slvec1b,0.0));
2674 
2675       PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2676       PetscCall(VecRestoreArray(xx,&x));
2677       PetscCall(VecRestoreArrayRead(bb,&b));
2678       PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2679 
2680       /* upper triangular part: bb1 = bb1 - B*x */
2681       PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1));
2682 
2683       /* local diagonal sweep */
2684       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx));
2685     }
2686     PetscCall(VecDestroy(&bb1));
2687   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2688     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2689   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2690     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2691   } else if (flag & SOR_EISENSTAT) {
2692     Vec               xx1;
2693     PetscBool         hasop;
2694     const PetscScalar *diag;
2695     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2696     PetscInt          i,n;
2697 
2698     if (!mat->xx1) {
2699       PetscCall(VecDuplicate(bb,&mat->xx1));
2700       PetscCall(VecDuplicate(bb,&mat->bb1));
2701     }
2702     xx1 = mat->xx1;
2703     bb1 = mat->bb1;
2704 
2705     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
2706 
2707     if (!mat->diag) {
2708       /* this is wrong for same matrix with new nonzero values */
2709       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
2710       PetscCall(MatGetDiagonal(matin,mat->diag));
2711     }
2712     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
2713 
2714     if (hasop) {
2715       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
2716       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2717     } else {
2718       /*
2719           These two lines are replaced by code that may be a bit faster for a good compiler
2720       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2721       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2722       */
2723       PetscCall(VecGetArray(mat->slvec1a,&sl));
2724       PetscCall(VecGetArrayRead(mat->diag,&diag));
2725       PetscCall(VecGetArrayRead(bb,&b));
2726       PetscCall(VecGetArray(xx,&x));
2727       PetscCall(VecGetLocalSize(xx,&n));
2728       if (omega == 1.0) {
2729         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2730         PetscCall(PetscLogFlops(2.0*n));
2731       } else {
2732         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2733         PetscCall(PetscLogFlops(3.0*n));
2734       }
2735       PetscCall(VecRestoreArray(mat->slvec1a,&sl));
2736       PetscCall(VecRestoreArrayRead(mat->diag,&diag));
2737       PetscCall(VecRestoreArrayRead(bb,&b));
2738       PetscCall(VecRestoreArray(xx,&x));
2739     }
2740 
2741     /* multiply off-diagonal portion of matrix */
2742     PetscCall(VecSet(mat->slvec1b,0.0));
2743     PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2744     PetscCall(VecGetArray(mat->slvec0,&from));
2745     PetscCall(VecGetArray(xx,&x));
2746     PetscCall(PetscArraycpy(from,x,bs*mbs));
2747     PetscCall(VecRestoreArray(mat->slvec0,&from));
2748     PetscCall(VecRestoreArray(xx,&x));
2749     PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2750     PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2751     PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a));
2752 
2753     /* local sweep */
2754     PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
2755     PetscCall(VecAXPY(xx,1.0,xx1));
2756   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 /*@
2761      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2762          CSR format the local rows.
2763 
2764    Collective
2765 
2766    Input Parameters:
2767 +  comm - MPI communicator
2768 .  bs - the block size, only a block size of 1 is supported
2769 .  m - number of local rows (Cannot be PETSC_DECIDE)
2770 .  n - This value should be the same as the local size used in creating the
2771        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2772        calculated if N is given) For square matrices n is almost always m.
2773 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2774 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2775 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2776 .   j - column indices
2777 -   a - matrix values
2778 
2779    Output Parameter:
2780 .   mat - the matrix
2781 
2782    Level: intermediate
2783 
2784    Notes:
2785        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2786      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2787      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2788 
2789        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2790 
2791 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2792           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2793 @*/
2794 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2795 {
2796   PetscFunctionBegin;
2797   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2798   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2799   PetscCall(MatCreate(comm,mat));
2800   PetscCall(MatSetSizes(*mat,m,n,M,N));
2801   PetscCall(MatSetType(*mat,MATMPISBAIJ));
2802   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a));
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 /*@C
2807    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2808 
2809    Collective
2810 
2811    Input Parameters:
2812 +  B - the matrix
2813 .  bs - the block size
2814 .  i - the indices into j for the start of each local row (starts with zero)
2815 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2816 -  v - optional values in the matrix
2817 
2818    Level: advanced
2819 
2820    Notes:
2821    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2822    and usually the numerical values as well
2823 
2824    Any entries below the diagonal are ignored
2825 
2826 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2827 @*/
2828 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2829 {
2830   PetscFunctionBegin;
2831   PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2836 {
2837   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2838   PetscInt       *indx;
2839   PetscScalar    *values;
2840 
2841   PetscFunctionBegin;
2842   PetscCall(MatGetSize(inmat,&m,&N));
2843   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2844     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2845     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2846     PetscInt       *bindx,rmax=a->rmax,j;
2847     PetscMPIInt    rank,size;
2848 
2849     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2850     mbs = m/bs; Nbs = N/cbs;
2851     if (n == PETSC_DECIDE) {
2852       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
2853     }
2854     nbs = n/cbs;
2855 
2856     PetscCall(PetscMalloc1(rmax,&bindx));
2857     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
2858 
2859     PetscCallMPI(MPI_Comm_rank(comm,&rank));
2860     PetscCallMPI(MPI_Comm_rank(comm,&size));
2861     if (rank == size-1) {
2862       /* Check sum(nbs) = Nbs */
2863       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
2864     }
2865 
2866     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2867     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2868     for (i=0; i<mbs; i++) {
2869       PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
2870       nnz  = nnz/bs;
2871       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2872       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
2873       PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL));
2874     }
2875     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2876     PetscCall(PetscFree(bindx));
2877 
2878     PetscCall(MatCreate(comm,outmat));
2879     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
2880     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
2881     PetscCall(MatSetType(*outmat,MATSBAIJ));
2882     PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz));
2883     PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
2884     MatPreallocateEnd(dnz,onz);
2885   }
2886 
2887   /* numeric phase */
2888   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2889   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
2890 
2891   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2892   for (i=0; i<m; i++) {
2893     PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2894     Ii   = i + rstart;
2895     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
2896     PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2897   }
2898   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2899   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
2900   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
2901   PetscFunctionReturn(0);
2902 }
2903