xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision fdf6c4e30aafdbc795e4f76379caa977fd5cdf5a)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21 
22   PetscFunctionBegin;
23   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
24   PetscValidLogicalCollectiveInt(B,nm,2);
25   for (i = 0; i < nm; i++) {
26     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
27     PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong));
28     PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
29   }
30   PetscValidLogicalCollectiveBool(B,fill,5);
31   PetscCall(MatGetBlockSize(B,&bs));
32   PetscCall(MatGetSize(B,&M,&N));
33   PetscCall(MatGetLocalSize(B,&m,&n));
34   PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator));
35   PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
36   PetscCall(MatSetBlockSize(preallocator,bs));
37   PetscCall(MatSetSizes(preallocator,m,n,M,N));
38   PetscCall(MatSetUp(preallocator));
39   PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
40   for (r = rstart; r < rend; ++r) {
41     PetscInt          ncols;
42     const PetscInt    *row;
43     const PetscScalar *vals;
44 
45     for (i = 0; i < nm; i++) {
46       PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals));
47       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
48       if (symm && symm[i]) {
49         PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES));
50       }
51       PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals));
52     }
53   }
54   PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
55   PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
56   PetscCall(MatPreallocatorPreallocate(preallocator,fill,B));
57   PetscCall(MatDestroy(&preallocator));
58   PetscFunctionReturn(0);
59 }
60 
61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
62 {
63   Mat            B;
64   PetscInt       r;
65 
66   PetscFunctionBegin;
67   if (reuse != MAT_REUSE_MATRIX) {
68     PetscBool symm = PETSC_TRUE,isdense;
69     PetscInt  bs;
70 
71     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
72     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
73     PetscCall(MatSetType(B,newtype));
74     PetscCall(MatGetBlockSize(A,&bs));
75     PetscCall(MatSetBlockSize(B,bs));
76     PetscCall(PetscLayoutSetUp(B->rmap));
77     PetscCall(PetscLayoutSetUp(B->cmap));
78     PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,""));
79     if (!isdense) {
80       PetscCall(MatGetRowUpperTriangular(A));
81       PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE));
82       PetscCall(MatRestoreRowUpperTriangular(A));
83     } else {
84       PetscCall(MatSetUp(B));
85     }
86   } else {
87     B    = *newmat;
88     PetscCall(MatZeroEntries(B));
89   }
90 
91   PetscCall(MatGetRowUpperTriangular(A));
92   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
93     PetscInt          ncols;
94     const PetscInt    *row;
95     const PetscScalar *vals;
96 
97     PetscCall(MatGetRow(A,r,&ncols,&row,&vals));
98     PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES));
99 #if defined(PETSC_USE_COMPLEX)
100     if (A->hermitian == PETSC_BOOL3_TRUE) {
101       PetscInt i;
102       for (i = 0; i < ncols; i++) {
103         PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES));
104       }
105     } else {
106       PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
107     }
108 #else
109     PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
110 #endif
111     PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals));
112   }
113   PetscCall(MatRestoreRowUpperTriangular(A));
114   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
116 
117   if (reuse == MAT_INPLACE_MATRIX) {
118     PetscCall(MatHeaderReplace(A,&B));
119   } else {
120     *newmat = B;
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
126 {
127   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
128 
129   PetscFunctionBegin;
130   PetscCall(MatStoreValues(aij->A));
131   PetscCall(MatStoreValues(aij->B));
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138 
139   PetscFunctionBegin;
140   PetscCall(MatRetrieveValues(aij->A));
141   PetscCall(MatRetrieveValues(aij->B));
142   PetscFunctionReturn(0);
143 }
144 
145 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
146   { \
147     brow = row/bs;  \
148     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
149     rmax = aimax[brow]; nrow = ailen[brow]; \
150     bcol = col/bs; \
151     ridx = row % bs; cidx = col % bs; \
152     low  = 0; high = nrow; \
153     while (high-low > 3) { \
154       t = (low+high)/2; \
155       if (rp[t] > bcol) high = t; \
156       else              low  = t; \
157     } \
158     for (_i=low; _i<high; _i++) { \
159       if (rp[_i] > bcol) break; \
160       if (rp[_i] == bcol) { \
161         bap = ap + bs2*_i + bs*cidx + ridx; \
162         if (addv == ADD_VALUES) *bap += value;  \
163         else                    *bap  = value;  \
164         goto a_noinsert; \
165       } \
166     } \
167     if (a->nonew == 1) goto a_noinsert; \
168     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
170     N = nrow++ - 1;  \
171     /* shift up all the later entries in this row */ \
172     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
173     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
174     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
175     rp[_i]                      = bcol;  \
176     ap[bs2*_i + bs*cidx + ridx] = value;  \
177     A->nonzerostate++;\
178 a_noinsert:; \
179     ailen[brow] = nrow; \
180   }
181 
182 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
183   { \
184     brow = row/bs;  \
185     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
186     rmax = bimax[brow]; nrow = bilen[brow]; \
187     bcol = col/bs; \
188     ridx = row % bs; cidx = col % bs; \
189     low  = 0; high = nrow; \
190     while (high-low > 3) { \
191       t = (low+high)/2; \
192       if (rp[t] > bcol) high = t; \
193       else              low  = t; \
194     } \
195     for (_i=low; _i<high; _i++) { \
196       if (rp[_i] > bcol) break; \
197       if (rp[_i] == bcol) { \
198         bap = ap + bs2*_i + bs*cidx + ridx; \
199         if (addv == ADD_VALUES) *bap += value;  \
200         else                    *bap  = value;  \
201         goto b_noinsert; \
202       } \
203     } \
204     if (b->nonew == 1) goto b_noinsert; \
205     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
206     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
207     N = nrow++ - 1;  \
208     /* shift up all the later entries in this row */ \
209     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
210     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
211     PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \
212     rp[_i]                      = bcol;  \
213     ap[bs2*_i + bs*cidx + ridx] = value;  \
214     B->nonzerostate++;\
215 b_noinsert:; \
216     bilen[brow] = nrow; \
217   }
218 
219 /* Only add/insert a(i,j) with i<=j (blocks).
220    Any a(i,j) with i>j input by user is ingored or generates an error
221 */
222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
223 {
224   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
225   MatScalar      value;
226   PetscBool      roworiented = baij->roworiented;
227   PetscInt       i,j,row,col;
228   PetscInt       rstart_orig=mat->rmap->rstart;
229   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
230   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
231 
232   /* Some Variables required in the macro */
233   Mat          A     = baij->A;
234   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
235   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
236   MatScalar    *aa   =a->a;
237 
238   Mat         B     = baij->B;
239   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
240   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
241   MatScalar   *ba   =b->a;
242 
243   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
244   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
245   MatScalar *ap,*bap;
246 
247   /* for stash */
248   PetscInt  n_loc, *in_loc = NULL;
249   MatScalar *v_loc = NULL;
250 
251   PetscFunctionBegin;
252   if (!baij->donotstash) {
253     if (n > baij->n_loc) {
254       PetscCall(PetscFree(baij->in_loc));
255       PetscCall(PetscFree(baij->v_loc));
256       PetscCall(PetscMalloc1(n,&baij->in_loc));
257       PetscCall(PetscMalloc1(n,&baij->v_loc));
258 
259       baij->n_loc = n;
260     }
261     in_loc = baij->in_loc;
262     v_loc  = baij->v_loc;
263   }
264 
265   for (i=0; i<m; i++) {
266     if (im[i] < 0) continue;
267     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
268     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
269       row = im[i] - rstart_orig;              /* local row index */
270       for (j=0; j<n; j++) {
271         if (im[i]/bs > in[j]/bs) {
272           if (a->ignore_ltriangular) {
273             continue;    /* ignore lower triangular blocks */
274           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
275         }
276         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
277           col  = in[j] - cstart_orig;         /* local col index */
278           brow = row/bs; bcol = col/bs;
279           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
280           if (roworiented) value = v[i*n+j];
281           else             value = v[i+j*m];
282           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
283           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
284         } else if (in[j] < 0) {
285           continue;
286         } else {
287           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
288           /* off-diag entry (B) */
289           if (mat->was_assembled) {
290             if (!baij->colmap) {
291               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
292             }
293 #if defined(PETSC_USE_CTABLE)
294             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
295             col  = col - 1;
296 #else
297             col = baij->colmap[in[j]/bs] - 1;
298 #endif
299             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
300               PetscCall(MatDisAssemble_MPISBAIJ(mat));
301               col  =  in[j];
302               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
303               B    = baij->B;
304               b    = (Mat_SeqBAIJ*)(B)->data;
305               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
306               ba   = b->a;
307             } else col += in[j]%bs;
308           } else col = in[j];
309           if (roworiented) value = v[i*n+j];
310           else             value = v[i+j*m];
311           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
312           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
313         }
314       }
315     } else {  /* off processor entry */
316       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
317       if (!baij->donotstash) {
318         mat->assembled = PETSC_FALSE;
319         n_loc          = 0;
320         for (j=0; j<n; j++) {
321           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
322           in_loc[n_loc] = in[j];
323           if (roworiented) {
324             v_loc[n_loc] = v[i*n+j];
325           } else {
326             v_loc[n_loc] = v[j*m+i];
327           }
328           n_loc++;
329         }
330         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE));
331       }
332     }
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
338 {
339   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
340   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
341   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
342   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
343   PetscBool         roworiented=a->roworiented;
344   const PetscScalar *value     = v;
345   MatScalar         *ap,*aa = a->a,*bap;
346 
347   PetscFunctionBegin;
348   if (col < row) {
349     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
350     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
351   }
352   rp   = aj + ai[row];
353   ap   = aa + bs2*ai[row];
354   rmax = imax[row];
355   nrow = ailen[row];
356   value = v;
357   low   = 0;
358   high  = nrow;
359 
360   while (high-low > 7) {
361     t = (low+high)/2;
362     if (rp[t] > col) high = t;
363     else             low  = t;
364   }
365   for (i=low; i<high; i++) {
366     if (rp[i] > col) break;
367     if (rp[i] == col) {
368       bap = ap +  bs2*i;
369       if (roworiented) {
370         if (is == ADD_VALUES) {
371           for (ii=0; ii<bs; ii++) {
372             for (jj=ii; jj<bs2; jj+=bs) {
373               bap[jj] += *value++;
374             }
375           }
376         } else {
377           for (ii=0; ii<bs; ii++) {
378             for (jj=ii; jj<bs2; jj+=bs) {
379               bap[jj] = *value++;
380             }
381           }
382         }
383       } else {
384         if (is == ADD_VALUES) {
385           for (ii=0; ii<bs; ii++) {
386             for (jj=0; jj<bs; jj++) {
387               *bap++ += *value++;
388             }
389           }
390         } else {
391           for (ii=0; ii<bs; ii++) {
392             for (jj=0; jj<bs; jj++) {
393               *bap++  = *value++;
394             }
395           }
396         }
397       }
398       goto noinsert2;
399     }
400   }
401   if (nonew == 1) goto noinsert2;
402   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
403   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
404   N = nrow++ - 1; high++;
405   /* shift up all the later entries in this row */
406   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
407   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
408   rp[i] = col;
409   bap   = ap +  bs2*i;
410   if (roworiented) {
411     for (ii=0; ii<bs; ii++) {
412       for (jj=ii; jj<bs2; jj+=bs) {
413         bap[jj] = *value++;
414       }
415     }
416   } else {
417     for (ii=0; ii<bs; ii++) {
418       for (jj=0; jj<bs; jj++) {
419         *bap++ = *value++;
420       }
421     }
422   }
423   noinsert2:;
424   ailen[row] = nrow;
425   PetscFunctionReturn(0);
426 }
427 
428 /*
429    This routine is exactly duplicated in mpibaij.c
430 */
431 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
432 {
433   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
434   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
435   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
436   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
437   PetscBool         roworiented=a->roworiented;
438   const PetscScalar *value     = v;
439   MatScalar         *ap,*aa = a->a,*bap;
440 
441   PetscFunctionBegin;
442   rp   = aj + ai[row];
443   ap   = aa + bs2*ai[row];
444   rmax = imax[row];
445   nrow = ailen[row];
446   low  = 0;
447   high = nrow;
448   value = v;
449   while (high-low > 7) {
450     t = (low+high)/2;
451     if (rp[t] > col) high = t;
452     else             low  = t;
453   }
454   for (i=low; i<high; i++) {
455     if (rp[i] > col) break;
456     if (rp[i] == col) {
457       bap = ap +  bs2*i;
458       if (roworiented) {
459         if (is == ADD_VALUES) {
460           for (ii=0; ii<bs; ii++) {
461             for (jj=ii; jj<bs2; jj+=bs) {
462               bap[jj] += *value++;
463             }
464           }
465         } else {
466           for (ii=0; ii<bs; ii++) {
467             for (jj=ii; jj<bs2; jj+=bs) {
468               bap[jj] = *value++;
469             }
470           }
471         }
472       } else {
473         if (is == ADD_VALUES) {
474           for (ii=0; ii<bs; ii++,value+=bs) {
475             for (jj=0; jj<bs; jj++) {
476               bap[jj] += value[jj];
477             }
478             bap += bs;
479           }
480         } else {
481           for (ii=0; ii<bs; ii++,value+=bs) {
482             for (jj=0; jj<bs; jj++) {
483               bap[jj]  = value[jj];
484             }
485             bap += bs;
486           }
487         }
488       }
489       goto noinsert2;
490     }
491   }
492   if (nonew == 1) goto noinsert2;
493   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
494   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
495   N = nrow++ - 1; high++;
496   /* shift up all the later entries in this row */
497   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
498   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
499   rp[i] = col;
500   bap   = ap +  bs2*i;
501   if (roworiented) {
502     for (ii=0; ii<bs; ii++) {
503       for (jj=ii; jj<bs2; jj+=bs) {
504         bap[jj] = *value++;
505       }
506     }
507   } else {
508     for (ii=0; ii<bs; ii++) {
509       for (jj=0; jj<bs; jj++) {
510         *bap++ = *value++;
511       }
512     }
513   }
514   noinsert2:;
515   ailen[row] = nrow;
516   PetscFunctionReturn(0);
517 }
518 
519 /*
520     This routine could be optimized by removing the need for the block copy below and passing stride information
521   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
522 */
523 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
524 {
525   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
526   const MatScalar *value;
527   MatScalar       *barray     =baij->barray;
528   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
529   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
530   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
531   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
532 
533   PetscFunctionBegin;
534   if (!barray) {
535     PetscCall(PetscMalloc1(bs2,&barray));
536     baij->barray = barray;
537   }
538 
539   if (roworiented) {
540     stepval = (n-1)*bs;
541   } else {
542     stepval = (m-1)*bs;
543   }
544   for (i=0; i<m; i++) {
545     if (im[i] < 0) continue;
546     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
547     if (im[i] >= rstart && im[i] < rend) {
548       row = im[i] - rstart;
549       for (j=0; j<n; j++) {
550         if (im[i] > in[j]) {
551           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
552           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
553         }
554         /* If NumCol = 1 then a copy is not required */
555         if ((roworiented) && (n == 1)) {
556           barray = (MatScalar*) v + i*bs2;
557         } else if ((!roworiented) && (m == 1)) {
558           barray = (MatScalar*) v + j*bs2;
559         } else { /* Here a copy is required */
560           if (roworiented) {
561             value = v + i*(stepval+bs)*bs + j*bs;
562           } else {
563             value = v + j*(stepval+bs)*bs + i*bs;
564           }
565           for (ii=0; ii<bs; ii++,value+=stepval) {
566             for (jj=0; jj<bs; jj++) {
567               *barray++ = *value++;
568             }
569           }
570           barray -=bs2;
571         }
572 
573         if (in[j] >= cstart && in[j] < cend) {
574           col  = in[j] - cstart;
575           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
576         } else if (in[j] < 0) {
577           continue;
578         } else {
579           PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
580           if (mat->was_assembled) {
581             if (!baij->colmap) {
582               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
583             }
584 
585 #if defined(PETSC_USE_DEBUG)
586 #if defined(PETSC_USE_CTABLE)
587             { PetscInt data;
588               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
589               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
590             }
591 #else
592             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
593 #endif
594 #endif
595 #if defined(PETSC_USE_CTABLE)
596             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
597             col  = (col - 1)/bs;
598 #else
599             col = (baij->colmap[in[j]] - 1)/bs;
600 #endif
601             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
602               PetscCall(MatDisAssemble_MPISBAIJ(mat));
603               col  = in[j];
604             }
605           } else col = in[j];
606           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
607         }
608       }
609     } else {
610       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
611       if (!baij->donotstash) {
612         if (roworiented) {
613           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
614         } else {
615           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
616         }
617       }
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
624 {
625   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
626   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
627   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
628 
629   PetscFunctionBegin;
630   for (i=0; i<m; i++) {
631     if (idxm[i] < 0) continue; /* negative row */
632     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
633     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
634       row = idxm[i] - bsrstart;
635       for (j=0; j<n; j++) {
636         if (idxn[j] < 0) continue; /* negative column */
637         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
638         if (idxn[j] >= bscstart && idxn[j] < bscend) {
639           col  = idxn[j] - bscstart;
640           PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
641         } else {
642           if (!baij->colmap) {
643             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
644           }
645 #if defined(PETSC_USE_CTABLE)
646           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
647           data--;
648 #else
649           data = baij->colmap[idxn[j]/bs]-1;
650 #endif
651           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
652           else {
653             col  = data + idxn[j]%bs;
654             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
655           }
656         }
657       }
658     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
664 {
665   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
666   PetscReal      sum[2],*lnorm2;
667 
668   PetscFunctionBegin;
669   if (baij->size == 1) {
670     PetscCall(MatNorm(baij->A,type,norm));
671   } else {
672     if (type == NORM_FROBENIUS) {
673       PetscCall(PetscMalloc1(2,&lnorm2));
674       PetscCall(MatNorm(baij->A,type,lnorm2));
675       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
676       PetscCall(MatNorm(baij->B,type,lnorm2));
677       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
678       PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
679       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
680       PetscCall(PetscFree(lnorm2));
681     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
682       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
683       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
684       PetscReal    *rsum,*rsum2,vabs;
685       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
686       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
687       MatScalar    *v;
688 
689       PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2));
690       PetscCall(PetscArrayzero(rsum,mat->cmap->N));
691       /* Amat */
692       v = amat->a; jj = amat->j;
693       for (brow=0; brow<mbs; brow++) {
694         grow = bs*(rstart + brow);
695         nz   = amat->i[brow+1] - amat->i[brow];
696         for (bcol=0; bcol<nz; bcol++) {
697           gcol = bs*(rstart + *jj); jj++;
698           for (col=0; col<bs; col++) {
699             for (row=0; row<bs; row++) {
700               vabs            = PetscAbsScalar(*v); v++;
701               rsum[gcol+col] += vabs;
702               /* non-diagonal block */
703               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
704             }
705           }
706         }
707         PetscCall(PetscLogFlops(nz*bs*bs));
708       }
709       /* Bmat */
710       v = bmat->a; jj = bmat->j;
711       for (brow=0; brow<mbs; brow++) {
712         grow = bs*(rstart + brow);
713         nz = bmat->i[brow+1] - bmat->i[brow];
714         for (bcol=0; bcol<nz; bcol++) {
715           gcol = bs*garray[*jj]; jj++;
716           for (col=0; col<bs; col++) {
717             for (row=0; row<bs; row++) {
718               vabs            = PetscAbsScalar(*v); v++;
719               rsum[gcol+col] += vabs;
720               rsum[grow+row] += vabs;
721             }
722           }
723         }
724         PetscCall(PetscLogFlops(nz*bs*bs));
725       }
726       PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
727       *norm = 0.0;
728       for (col=0; col<mat->cmap->N; col++) {
729         if (rsum2[col] > *norm) *norm = rsum2[col];
730       }
731       PetscCall(PetscFree2(rsum,rsum2));
732     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
733   }
734   PetscFunctionReturn(0);
735 }
736 
737 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
738 {
739   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
740   PetscInt       nstash,reallocs;
741 
742   PetscFunctionBegin;
743   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
744 
745   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
746   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
747   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
748   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
749   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
750   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
751   PetscFunctionReturn(0);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
755 {
756   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
757   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
758   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
759   PetscInt       *row,*col;
760   PetscBool      other_disassembled;
761   PetscMPIInt    n;
762   PetscBool      r1,r2,r3;
763   MatScalar      *val;
764 
765   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
766   PetscFunctionBegin;
767   if (!baij->donotstash &&  !mat->nooffprocentries) {
768     while (1) {
769       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
770       if (!flg) break;
771 
772       for (i=0; i<n;) {
773         /* Now identify the consecutive vals belonging to the same row */
774         for (j=i,rstart=row[j]; j<n; j++) {
775           if (row[j] != rstart) break;
776         }
777         if (j < n) ncols = j-i;
778         else       ncols = n-i;
779         /* Now assemble all these values with a single function call */
780         PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
781         i    = j;
782       }
783     }
784     PetscCall(MatStashScatterEnd_Private(&mat->stash));
785     /* Now process the block-stash. Since the values are stashed column-oriented,
786        set the roworiented flag to column oriented, and after MatSetValues()
787        restore the original flags */
788     r1 = baij->roworiented;
789     r2 = a->roworiented;
790     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
791 
792     baij->roworiented = PETSC_FALSE;
793     a->roworiented    = PETSC_FALSE;
794 
795     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
796     while (1) {
797       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
798       if (!flg) break;
799 
800       for (i=0; i<n;) {
801         /* Now identify the consecutive vals belonging to the same row */
802         for (j=i,rstart=row[j]; j<n; j++) {
803           if (row[j] != rstart) break;
804         }
805         if (j < n) ncols = j-i;
806         else       ncols = n-i;
807         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
808         i    = j;
809       }
810     }
811     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
812 
813     baij->roworiented = r1;
814     a->roworiented    = r2;
815 
816     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
817   }
818 
819   PetscCall(MatAssemblyBegin(baij->A,mode));
820   PetscCall(MatAssemblyEnd(baij->A,mode));
821 
822   /* determine if any processor has disassembled, if so we must
823      also disassemble ourselves, in order that we may reassemble. */
824   /*
825      if nonzero structure of submatrix B cannot change then we know that
826      no processor disassembled thus we can skip this stuff
827   */
828   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
829     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
830     if (mat->was_assembled && !other_disassembled) {
831       PetscCall(MatDisAssemble_MPISBAIJ(mat));
832     }
833   }
834 
835   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
836     PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
837   }
838   PetscCall(MatAssemblyBegin(baij->B,mode));
839   PetscCall(MatAssemblyEnd(baij->B,mode));
840 
841   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
842 
843   baij->rowvalues = NULL;
844 
845   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
846   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
847     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
848     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
849   }
850   PetscFunctionReturn(0);
851 }
852 
853 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
854 #include <petscdraw.h>
855 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
856 {
857   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
858   PetscInt          bs   = mat->rmap->bs;
859   PetscMPIInt       rank = baij->rank;
860   PetscBool         iascii,isdraw;
861   PetscViewer       sviewer;
862   PetscViewerFormat format;
863 
864   PetscFunctionBegin;
865   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
866   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
867   if (iascii) {
868     PetscCall(PetscViewerGetFormat(viewer,&format));
869     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
870       MatInfo info;
871       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
872       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
873       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
874       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
875       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
876       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
877       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
878       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
879       PetscCall(PetscViewerFlush(viewer));
880       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
881       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
882       PetscCall(VecScatterView(baij->Mvctx,viewer));
883       PetscFunctionReturn(0);
884     } else if (format == PETSC_VIEWER_ASCII_INFO) {
885       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
886       PetscFunctionReturn(0);
887     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
888       PetscFunctionReturn(0);
889     }
890   }
891 
892   if (isdraw) {
893     PetscDraw draw;
894     PetscBool isnull;
895     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
896     PetscCall(PetscDrawIsNull(draw,&isnull));
897     if (isnull) PetscFunctionReturn(0);
898   }
899 
900   {
901     /* assemble the entire matrix onto first processor. */
902     Mat          A;
903     Mat_SeqSBAIJ *Aloc;
904     Mat_SeqBAIJ  *Bloc;
905     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
906     MatScalar    *a;
907     const char   *matname;
908 
909     /* Should this be the same type as mat? */
910     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
911     if (rank == 0) {
912       PetscCall(MatSetSizes(A,M,N,M,N));
913     } else {
914       PetscCall(MatSetSizes(A,0,0,M,N));
915     }
916     PetscCall(MatSetType(A,MATMPISBAIJ));
917     PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
918     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
919     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
920 
921     /* copy over the A part */
922     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
923     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
924     PetscCall(PetscMalloc1(bs,&rvals));
925 
926     for (i=0; i<mbs; i++) {
927       rvals[0] = bs*(baij->rstartbs + i);
928       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
929       for (j=ai[i]; j<ai[i+1]; j++) {
930         col = (baij->cstartbs+aj[j])*bs;
931         for (k=0; k<bs; k++) {
932           PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
933           col++;
934           a += bs;
935         }
936       }
937     }
938     /* copy over the B part */
939     Bloc = (Mat_SeqBAIJ*)baij->B->data;
940     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
941     for (i=0; i<mbs; i++) {
942 
943       rvals[0] = bs*(baij->rstartbs + i);
944       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
945       for (j=ai[i]; j<ai[i+1]; j++) {
946         col = baij->garray[aj[j]]*bs;
947         for (k=0; k<bs; k++) {
948           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
949           col++;
950           a += bs;
951         }
952       }
953     }
954     PetscCall(PetscFree(rvals));
955     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
956     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
957     /*
958        Everyone has to call to draw the matrix since the graphics waits are
959        synchronized across all processors that share the PetscDraw object
960     */
961     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
962     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
963     if (rank == 0) {
964       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname));
965       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer));
966     }
967     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
968     PetscCall(PetscViewerFlush(viewer));
969     PetscCall(MatDestroy(&A));
970   }
971   PetscFunctionReturn(0);
972 }
973 
974 /* Used for both MPIBAIJ and MPISBAIJ matrices */
975 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
976 
977 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
978 {
979   PetscBool      iascii,isdraw,issocket,isbinary;
980 
981   PetscFunctionBegin;
982   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
983   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
984   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
985   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
986   if (iascii || isdraw || issocket) {
987     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer));
988   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat,viewer));
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
993 {
994   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
995 
996   PetscFunctionBegin;
997 #if defined(PETSC_USE_LOG)
998   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
999 #endif
1000   PetscCall(MatStashDestroy_Private(&mat->stash));
1001   PetscCall(MatStashDestroy_Private(&mat->bstash));
1002   PetscCall(MatDestroy(&baij->A));
1003   PetscCall(MatDestroy(&baij->B));
1004 #if defined(PETSC_USE_CTABLE)
1005   PetscCall(PetscTableDestroy(&baij->colmap));
1006 #else
1007   PetscCall(PetscFree(baij->colmap));
1008 #endif
1009   PetscCall(PetscFree(baij->garray));
1010   PetscCall(VecDestroy(&baij->lvec));
1011   PetscCall(VecScatterDestroy(&baij->Mvctx));
1012   PetscCall(VecDestroy(&baij->slvec0));
1013   PetscCall(VecDestroy(&baij->slvec0b));
1014   PetscCall(VecDestroy(&baij->slvec1));
1015   PetscCall(VecDestroy(&baij->slvec1a));
1016   PetscCall(VecDestroy(&baij->slvec1b));
1017   PetscCall(VecScatterDestroy(&baij->sMvctx));
1018   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1019   PetscCall(PetscFree(baij->barray));
1020   PetscCall(PetscFree(baij->hd));
1021   PetscCall(VecDestroy(&baij->diag));
1022   PetscCall(VecDestroy(&baij->bb1));
1023   PetscCall(VecDestroy(&baij->xx1));
1024 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1025   PetscCall(PetscFree(baij->setvaluescopy));
1026 #endif
1027   PetscCall(PetscFree(baij->in_loc));
1028   PetscCall(PetscFree(baij->v_loc));
1029   PetscCall(PetscFree(baij->rangebs));
1030   PetscCall(PetscFree(mat->data));
1031 
1032   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1033   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1034   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1035   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL));
1036   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL));
1037 #if defined(PETSC_HAVE_ELEMENTAL)
1038   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL));
1039 #endif
1040 #if defined(PETSC_HAVE_SCALAPACK)
1041   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL));
1042 #endif
1043   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL));
1044   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL));
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1049 {
1050   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1051   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1052   PetscScalar       *from;
1053   const PetscScalar *x;
1054 
1055   PetscFunctionBegin;
1056   /* diagonal part */
1057   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1058   PetscCall(VecSet(a->slvec1b,0.0));
1059 
1060   /* subdiagonal part */
1061   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1062   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1063 
1064   /* copy x into the vec slvec0 */
1065   PetscCall(VecGetArray(a->slvec0,&from));
1066   PetscCall(VecGetArrayRead(xx,&x));
1067 
1068   PetscCall(PetscArraycpy(from,x,bs*mbs));
1069   PetscCall(VecRestoreArray(a->slvec0,&from));
1070   PetscCall(VecRestoreArrayRead(xx,&x));
1071 
1072   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1073   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1074   /* supperdiagonal part */
1075   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1082   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1083   PetscScalar       *from;
1084   const PetscScalar *x;
1085 
1086   PetscFunctionBegin;
1087   /* diagonal part */
1088   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1089   PetscCall(VecSet(a->slvec1b,0.0));
1090 
1091   /* subdiagonal part */
1092   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1093 
1094   /* copy x into the vec slvec0 */
1095   PetscCall(VecGetArray(a->slvec0,&from));
1096   PetscCall(VecGetArrayRead(xx,&x));
1097 
1098   PetscCall(PetscArraycpy(from,x,bs*mbs));
1099   PetscCall(VecRestoreArray(a->slvec0,&from));
1100   PetscCall(VecRestoreArrayRead(xx,&x));
1101 
1102   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1103   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1104   /* supperdiagonal part */
1105   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1112   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1113   PetscScalar       *from,zero=0.0;
1114   const PetscScalar *x;
1115 
1116   PetscFunctionBegin;
1117   /* diagonal part */
1118   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1119   PetscCall(VecSet(a->slvec1b,zero));
1120 
1121   /* subdiagonal part */
1122   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1123   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1124 
1125   /* copy x into the vec slvec0 */
1126   PetscCall(VecGetArray(a->slvec0,&from));
1127   PetscCall(VecGetArrayRead(xx,&x));
1128   PetscCall(PetscArraycpy(from,x,bs*mbs));
1129   PetscCall(VecRestoreArray(a->slvec0,&from));
1130 
1131   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1132   PetscCall(VecRestoreArrayRead(xx,&x));
1133   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1134 
1135   /* supperdiagonal part */
1136   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1141 {
1142   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1143   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1144   PetscScalar       *from,zero=0.0;
1145   const PetscScalar *x;
1146 
1147   PetscFunctionBegin;
1148   /* diagonal part */
1149   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1150   PetscCall(VecSet(a->slvec1b,zero));
1151 
1152   /* subdiagonal part */
1153   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1154 
1155   /* copy x into the vec slvec0 */
1156   PetscCall(VecGetArray(a->slvec0,&from));
1157   PetscCall(VecGetArrayRead(xx,&x));
1158   PetscCall(PetscArraycpy(from,x,bs*mbs));
1159   PetscCall(VecRestoreArray(a->slvec0,&from));
1160 
1161   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1162   PetscCall(VecRestoreArrayRead(xx,&x));
1163   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1164 
1165   /* supperdiagonal part */
1166   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 /*
1171   This only works correctly for square matrices where the subblock A->A is the
1172    diagonal block
1173 */
1174 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1175 {
1176   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1177 
1178   PetscFunctionBegin;
1179   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1180   PetscCall(MatGetDiagonal(a->A,v));
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1185 {
1186   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1187 
1188   PetscFunctionBegin;
1189   PetscCall(MatScale(a->A,aa));
1190   PetscCall(MatScale(a->B,aa));
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1195 {
1196   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1197   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1198   PetscInt     bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1199   PetscInt     nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1200   PetscInt    *cmap,*idx_p,cstart = mat->rstartbs;
1201 
1202   PetscFunctionBegin;
1203   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1204   mat->getrowactive = PETSC_TRUE;
1205 
1206   if (!mat->rowvalues && (idx || v)) {
1207     /*
1208         allocate enough space to hold information from the longest row.
1209     */
1210     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1211     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1212     PetscInt     max = 1,mbs = mat->mbs,tmp;
1213     for (i=0; i<mbs; i++) {
1214       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1215       if (max < tmp) max = tmp;
1216     }
1217     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1218   }
1219 
1220   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1221   lrow = row - brstart;  /* local row index */
1222 
1223   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1224   if (!v)   {pvA = NULL; pvB = NULL;}
1225   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1226   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1227   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1228   nztot = nzA + nzB;
1229 
1230   cmap = mat->garray;
1231   if (v  || idx) {
1232     if (nztot) {
1233       /* Sort by increasing column numbers, assuming A and B already sorted */
1234       PetscInt imark = -1;
1235       if (v) {
1236         *v = v_p = mat->rowvalues;
1237         for (i=0; i<nzB; i++) {
1238           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1239           else break;
1240         }
1241         imark = i;
1242         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1243         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1244       }
1245       if (idx) {
1246         *idx = idx_p = mat->rowindices;
1247         if (imark > -1) {
1248           for (i=0; i<imark; i++) {
1249             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1250           }
1251         } else {
1252           for (i=0; i<nzB; i++) {
1253             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1254             else break;
1255           }
1256           imark = i;
1257         }
1258         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1259         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1260       }
1261     } else {
1262       if (idx) *idx = NULL;
1263       if (v)   *v   = NULL;
1264     }
1265   }
1266   *nz  = nztot;
1267   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1268   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1273 {
1274   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1275 
1276   PetscFunctionBegin;
1277   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1278   baij->getrowactive = PETSC_FALSE;
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1283 {
1284   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1285   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1286 
1287   PetscFunctionBegin;
1288   aA->getrow_utriangular = PETSC_TRUE;
1289   PetscFunctionReturn(0);
1290 }
1291 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1292 {
1293   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1294   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1295 
1296   PetscFunctionBegin;
1297   aA->getrow_utriangular = PETSC_FALSE;
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1302 {
1303   PetscFunctionBegin;
1304   if (PetscDefined(USE_COMPLEX)) {
1305     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
1306 
1307     PetscCall(MatConjugate(a->A));
1308     PetscCall(MatConjugate(a->B));
1309   }
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1314 {
1315   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1316 
1317   PetscFunctionBegin;
1318   PetscCall(MatRealPart(a->A));
1319   PetscCall(MatRealPart(a->B));
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1324 {
1325   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1326 
1327   PetscFunctionBegin;
1328   PetscCall(MatImaginaryPart(a->A));
1329   PetscCall(MatImaginaryPart(a->B));
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1334    Input: isrow       - distributed(parallel),
1335           iscol_local - locally owned (seq)
1336 */
1337 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1338 {
1339   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1340   const PetscInt *ptr1,*ptr2;
1341 
1342   PetscFunctionBegin;
1343   PetscCall(ISGetLocalSize(isrow,&sz1));
1344   PetscCall(ISGetLocalSize(iscol_local,&sz2));
1345   if (sz1 > sz2) {
1346     *flg = PETSC_FALSE;
1347     PetscFunctionReturn(0);
1348   }
1349 
1350   PetscCall(ISGetIndices(isrow,&ptr1));
1351   PetscCall(ISGetIndices(iscol_local,&ptr2));
1352 
1353   PetscCall(PetscMalloc1(sz1,&a1));
1354   PetscCall(PetscMalloc1(sz2,&a2));
1355   PetscCall(PetscArraycpy(a1,ptr1,sz1));
1356   PetscCall(PetscArraycpy(a2,ptr2,sz2));
1357   PetscCall(PetscSortInt(sz1,a1));
1358   PetscCall(PetscSortInt(sz2,a2));
1359 
1360   nmatch=0;
1361   k     = 0;
1362   for (i=0; i<sz1; i++) {
1363     for (j=k; j<sz2; j++) {
1364       if (a1[i] == a2[j]) {
1365         k = j; nmatch++;
1366         break;
1367       }
1368     }
1369   }
1370   PetscCall(ISRestoreIndices(isrow,&ptr1));
1371   PetscCall(ISRestoreIndices(iscol_local,&ptr2));
1372   PetscCall(PetscFree(a1));
1373   PetscCall(PetscFree(a2));
1374   if (nmatch < sz1) {
1375     *flg = PETSC_FALSE;
1376   } else {
1377     *flg = PETSC_TRUE;
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1383 {
1384   IS             iscol_local;
1385   PetscInt       csize;
1386   PetscBool      isequal;
1387 
1388   PetscFunctionBegin;
1389   PetscCall(ISGetLocalSize(iscol,&csize));
1390   if (call == MAT_REUSE_MATRIX) {
1391     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1392     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1393   } else {
1394     PetscBool issorted;
1395 
1396     PetscCall(ISAllGather(iscol,&iscol_local));
1397     PetscCall(ISEqual_private(isrow,iscol_local,&isequal));
1398     PetscCall(ISSorted(iscol_local, &issorted));
1399     PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted");
1400   }
1401 
1402   /* now call MatCreateSubMatrix_MPIBAIJ() */
1403   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1404   if (call == MAT_INITIAL_MATRIX) {
1405     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1406     PetscCall(ISDestroy(&iscol_local));
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(MatZeroEntries(l->A));
1417   PetscCall(MatZeroEntries(l->B));
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1422 {
1423   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1424   Mat            A  = a->A,B = a->B;
1425   PetscLogDouble isend[5],irecv[5];
1426 
1427   PetscFunctionBegin;
1428   info->block_size = (PetscReal)matin->rmap->bs;
1429 
1430   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1431 
1432   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1433   isend[3] = info->memory;  isend[4] = info->mallocs;
1434 
1435   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1436 
1437   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1438   isend[3] += info->memory;  isend[4] += info->mallocs;
1439   if (flag == MAT_LOCAL) {
1440     info->nz_used      = isend[0];
1441     info->nz_allocated = isend[1];
1442     info->nz_unneeded  = isend[2];
1443     info->memory       = isend[3];
1444     info->mallocs      = isend[4];
1445   } else if (flag == MAT_GLOBAL_MAX) {
1446     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else if (flag == MAT_GLOBAL_SUM) {
1454     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1455 
1456     info->nz_used      = irecv[0];
1457     info->nz_allocated = irecv[1];
1458     info->nz_unneeded  = irecv[2];
1459     info->memory       = irecv[3];
1460     info->mallocs      = irecv[4];
1461   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1462   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1463   info->fill_ratio_needed = 0;
1464   info->factor_mallocs    = 0;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1469 {
1470   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1471   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1472 
1473   PetscFunctionBegin;
1474   switch (op) {
1475   case MAT_NEW_NONZERO_LOCATIONS:
1476   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1477   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1478   case MAT_KEEP_NONZERO_PATTERN:
1479   case MAT_SUBMAT_SINGLEIS:
1480   case MAT_NEW_NONZERO_LOCATION_ERR:
1481     MatCheckPreallocated(A,1);
1482     PetscCall(MatSetOption(a->A,op,flg));
1483     PetscCall(MatSetOption(a->B,op,flg));
1484     break;
1485   case MAT_ROW_ORIENTED:
1486     MatCheckPreallocated(A,1);
1487     a->roworiented = flg;
1488 
1489     PetscCall(MatSetOption(a->A,op,flg));
1490     PetscCall(MatSetOption(a->B,op,flg));
1491     break;
1492   case MAT_FORCE_DIAGONAL_ENTRIES:
1493   case MAT_SORTED_FULL:
1494     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1495     break;
1496   case MAT_IGNORE_OFF_PROC_ENTRIES:
1497     a->donotstash = flg;
1498     break;
1499   case MAT_USE_HASH_TABLE:
1500     a->ht_flag = flg;
1501     break;
1502   case MAT_HERMITIAN:
1503     MatCheckPreallocated(A,1);
1504     PetscCall(MatSetOption(a->A,op,flg));
1505 #if defined(PETSC_USE_COMPLEX)
1506     if (flg) { /* need different mat-vec ops */
1507       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1508       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1509       A->ops->multtranspose    = NULL;
1510       A->ops->multtransposeadd = NULL;
1511       A->symmetric = PETSC_BOOL3_FALSE;
1512     }
1513 #endif
1514     break;
1515   case MAT_SPD:
1516   case MAT_SYMMETRIC:
1517     MatCheckPreallocated(A,1);
1518     PetscCall(MatSetOption(a->A,op,flg));
1519 #if defined(PETSC_USE_COMPLEX)
1520     if (flg) { /* restore to use default mat-vec ops */
1521       A->ops->mult             = MatMult_MPISBAIJ;
1522       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1523       A->ops->multtranspose    = MatMult_MPISBAIJ;
1524       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1525     }
1526 #endif
1527     break;
1528   case MAT_STRUCTURALLY_SYMMETRIC:
1529     MatCheckPreallocated(A,1);
1530     PetscCall(MatSetOption(a->A,op,flg));
1531     break;
1532   case MAT_SYMMETRY_ETERNAL:
1533   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1534     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1535     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1536     break;
1537   case MAT_SPD_ETERNAL:
1538     break;
1539   case MAT_IGNORE_LOWER_TRIANGULAR:
1540     aA->ignore_ltriangular = flg;
1541     break;
1542   case MAT_ERROR_LOWER_TRIANGULAR:
1543     aA->ignore_ltriangular = flg;
1544     break;
1545   case MAT_GETROW_UPPERTRIANGULAR:
1546     aA->getrow_utriangular = flg;
1547     break;
1548   default:
1549     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1550   }
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1555 {
1556   PetscFunctionBegin;
1557   if (reuse == MAT_INITIAL_MATRIX) {
1558     PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B));
1559   }  else if (reuse == MAT_REUSE_MATRIX) {
1560     PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN));
1561   }
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1566 {
1567   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1568   Mat            a     = baij->A, b=baij->B;
1569   PetscInt       nv,m,n;
1570   PetscBool      flg;
1571 
1572   PetscFunctionBegin;
1573   if (ll != rr) {
1574     PetscCall(VecEqual(ll,rr,&flg));
1575     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same");
1576   }
1577   if (!ll) PetscFunctionReturn(0);
1578 
1579   PetscCall(MatGetLocalSize(mat,&m,&n));
1580   PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n);
1581 
1582   PetscCall(VecGetLocalSize(rr,&nv));
1583   PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1584 
1585   PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1586 
1587   /* left diagonalscale the off-diagonal part */
1588   PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1589 
1590   /* scale the diagonal part */
1591   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1592 
1593   /* right diagonalscale the off-diagonal part */
1594   PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1595   PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1600 {
1601   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1602 
1603   PetscFunctionBegin;
1604   PetscCall(MatSetUnfactored(a->A));
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1609 
1610 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1611 {
1612   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1613   Mat            a,b,c,d;
1614   PetscBool      flg;
1615 
1616   PetscFunctionBegin;
1617   a = matA->A; b = matA->B;
1618   c = matB->A; d = matB->B;
1619 
1620   PetscCall(MatEqual(a,c,&flg));
1621   if (flg) {
1622     PetscCall(MatEqual(b,d,&flg));
1623   }
1624   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1625   PetscFunctionReturn(0);
1626 }
1627 
1628 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1629 {
1630   PetscBool      isbaij;
1631 
1632   PetscFunctionBegin;
1633   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,""));
1634   PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1635   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1636   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1637     PetscCall(MatGetRowUpperTriangular(A));
1638     PetscCall(MatCopy_Basic(A,B,str));
1639     PetscCall(MatRestoreRowUpperTriangular(A));
1640   } else {
1641     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1642     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1643 
1644     PetscCall(MatCopy(a->A,b->A,str));
1645     PetscCall(MatCopy(a->B,b->B,str));
1646   }
1647   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1652 {
1653   PetscFunctionBegin;
1654   PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1659 {
1660   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1661   PetscBLASInt   bnz,one=1;
1662   Mat_SeqSBAIJ   *xa,*ya;
1663   Mat_SeqBAIJ    *xb,*yb;
1664 
1665   PetscFunctionBegin;
1666   if (str == SAME_NONZERO_PATTERN) {
1667     PetscScalar alpha = a;
1668     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1669     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1670     PetscCall(PetscBLASIntCast(xa->nz,&bnz));
1671     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1672     xb   = (Mat_SeqBAIJ*)xx->B->data;
1673     yb   = (Mat_SeqBAIJ*)yy->B->data;
1674     PetscCall(PetscBLASIntCast(xb->nz,&bnz));
1675     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1676     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1677   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1678     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
1679     PetscCall(MatAXPY_Basic(Y,a,X,str));
1680     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
1681   } else {
1682     Mat      B;
1683     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1684     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1685     PetscCall(MatGetRowUpperTriangular(X));
1686     PetscCall(MatGetRowUpperTriangular(Y));
1687     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1688     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1689     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1690     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1691     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1692     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1693     PetscCall(MatSetType(B,MATMPISBAIJ));
1694     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d));
1695     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1696     PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1697     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1698     PetscCall(MatHeaderMerge(Y,&B));
1699     PetscCall(PetscFree(nnz_d));
1700     PetscCall(PetscFree(nnz_o));
1701     PetscCall(MatRestoreRowUpperTriangular(X));
1702     PetscCall(MatRestoreRowUpperTriangular(Y));
1703   }
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1708 {
1709   PetscInt       i;
1710   PetscBool      flg;
1711 
1712   PetscFunctionBegin;
1713   PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */
1714   for (i=0; i<n; i++) {
1715     PetscCall(ISEqual(irow[i],icol[i],&flg));
1716     if (!flg) {
1717       PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1718     }
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1724 {
1725   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1726   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1727 
1728   PetscFunctionBegin;
1729   if (!Y->preallocated) {
1730     PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
1731   } else if (!aij->nz) {
1732     PetscInt nonew = aij->nonew;
1733     PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
1734     aij->nonew = nonew;
1735   }
1736   PetscCall(MatShift_Basic(Y,a));
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1741 {
1742   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1743 
1744   PetscFunctionBegin;
1745   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1746   PetscCall(MatMissingDiagonal(a->A,missing,d));
1747   if (d) {
1748     PetscInt rstart;
1749     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1750     *d += rstart/A->rmap->bs;
1751 
1752   }
1753   PetscFunctionReturn(0);
1754 }
1755 
1756 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1757 {
1758   PetscFunctionBegin;
1759   *a = ((Mat_MPISBAIJ*)A->data)->A;
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 /* -------------------------------------------------------------------*/
1764 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1765                                        MatGetRow_MPISBAIJ,
1766                                        MatRestoreRow_MPISBAIJ,
1767                                        MatMult_MPISBAIJ,
1768                                /*  4*/ MatMultAdd_MPISBAIJ,
1769                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1770                                        MatMultAdd_MPISBAIJ,
1771                                        NULL,
1772                                        NULL,
1773                                        NULL,
1774                                /* 10*/ NULL,
1775                                        NULL,
1776                                        NULL,
1777                                        MatSOR_MPISBAIJ,
1778                                        MatTranspose_MPISBAIJ,
1779                                /* 15*/ MatGetInfo_MPISBAIJ,
1780                                        MatEqual_MPISBAIJ,
1781                                        MatGetDiagonal_MPISBAIJ,
1782                                        MatDiagonalScale_MPISBAIJ,
1783                                        MatNorm_MPISBAIJ,
1784                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1785                                        MatAssemblyEnd_MPISBAIJ,
1786                                        MatSetOption_MPISBAIJ,
1787                                        MatZeroEntries_MPISBAIJ,
1788                                /* 24*/ NULL,
1789                                        NULL,
1790                                        NULL,
1791                                        NULL,
1792                                        NULL,
1793                                /* 29*/ MatSetUp_MPISBAIJ,
1794                                        NULL,
1795                                        NULL,
1796                                        MatGetDiagonalBlock_MPISBAIJ,
1797                                        NULL,
1798                                /* 34*/ MatDuplicate_MPISBAIJ,
1799                                        NULL,
1800                                        NULL,
1801                                        NULL,
1802                                        NULL,
1803                                /* 39*/ MatAXPY_MPISBAIJ,
1804                                        MatCreateSubMatrices_MPISBAIJ,
1805                                        MatIncreaseOverlap_MPISBAIJ,
1806                                        MatGetValues_MPISBAIJ,
1807                                        MatCopy_MPISBAIJ,
1808                                /* 44*/ NULL,
1809                                        MatScale_MPISBAIJ,
1810                                        MatShift_MPISBAIJ,
1811                                        NULL,
1812                                        NULL,
1813                                /* 49*/ NULL,
1814                                        NULL,
1815                                        NULL,
1816                                        NULL,
1817                                        NULL,
1818                                /* 54*/ NULL,
1819                                        NULL,
1820                                        MatSetUnfactored_MPISBAIJ,
1821                                        NULL,
1822                                        MatSetValuesBlocked_MPISBAIJ,
1823                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1824                                        NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                /* 64*/ NULL,
1829                                        NULL,
1830                                        NULL,
1831                                        NULL,
1832                                        NULL,
1833                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1834                                        NULL,
1835                                        MatConvert_MPISBAIJ_Basic,
1836                                        NULL,
1837                                        NULL,
1838                                /* 74*/ NULL,
1839                                        NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                /* 79*/ NULL,
1844                                        NULL,
1845                                        NULL,
1846                                        NULL,
1847                                        MatLoad_MPISBAIJ,
1848                                /* 84*/ NULL,
1849                                        NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                /* 89*/ NULL,
1854                                        NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                /* 94*/ NULL,
1859                                        NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        NULL,
1863                                /* 99*/ NULL,
1864                                        NULL,
1865                                        NULL,
1866                                        MatConjugate_MPISBAIJ,
1867                                        NULL,
1868                                /*104*/ NULL,
1869                                        MatRealPart_MPISBAIJ,
1870                                        MatImaginaryPart_MPISBAIJ,
1871                                        MatGetRowUpperTriangular_MPISBAIJ,
1872                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1873                                /*109*/ NULL,
1874                                        NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        MatMissingDiagonal_MPISBAIJ,
1878                                /*114*/ NULL,
1879                                        NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                /*119*/ NULL,
1884                                        NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        NULL,
1888                                /*124*/ NULL,
1889                                        NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                /*129*/ NULL,
1894                                        NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                /*134*/ NULL,
1899                                        NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                /*139*/ MatSetBlockSizes_Default,
1904                                        NULL,
1905                                        NULL,
1906                                        NULL,
1907                                        NULL,
1908                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1909                                        NULL,
1910                                        NULL,
1911                                        NULL,
1912                                        NULL,
1913                                        NULL
1914 };
1915 
1916 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1917 {
1918   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1919   PetscInt       i,mbs,Mbs;
1920   PetscMPIInt    size;
1921 
1922   PetscFunctionBegin;
1923   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
1924   PetscCall(PetscLayoutSetUp(B->rmap));
1925   PetscCall(PetscLayoutSetUp(B->cmap));
1926   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
1927   PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N);
1928   PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n);
1929 
1930   mbs = B->rmap->n/bs;
1931   Mbs = B->rmap->N/bs;
1932   PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs);
1933 
1934   B->rmap->bs = bs;
1935   b->bs2      = bs*bs;
1936   b->mbs      = mbs;
1937   b->Mbs      = Mbs;
1938   b->nbs      = B->cmap->n/bs;
1939   b->Nbs      = B->cmap->N/bs;
1940 
1941   for (i=0; i<=b->size; i++) {
1942     b->rangebs[i] = B->rmap->range[i]/bs;
1943   }
1944   b->rstartbs = B->rmap->rstart/bs;
1945   b->rendbs   = B->rmap->rend/bs;
1946 
1947   b->cstartbs = B->cmap->rstart/bs;
1948   b->cendbs   = B->cmap->rend/bs;
1949 
1950 #if defined(PETSC_USE_CTABLE)
1951   PetscCall(PetscTableDestroy(&b->colmap));
1952 #else
1953   PetscCall(PetscFree(b->colmap));
1954 #endif
1955   PetscCall(PetscFree(b->garray));
1956   PetscCall(VecDestroy(&b->lvec));
1957   PetscCall(VecScatterDestroy(&b->Mvctx));
1958   PetscCall(VecDestroy(&b->slvec0));
1959   PetscCall(VecDestroy(&b->slvec0b));
1960   PetscCall(VecDestroy(&b->slvec1));
1961   PetscCall(VecDestroy(&b->slvec1a));
1962   PetscCall(VecDestroy(&b->slvec1b));
1963   PetscCall(VecScatterDestroy(&b->sMvctx));
1964 
1965   /* Because the B will have been resized we simply destroy it and create a new one each time */
1966   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
1967   PetscCall(MatDestroy(&b->B));
1968   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
1969   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
1970   PetscCall(MatSetType(b->B,MATSEQBAIJ));
1971   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
1972 
1973   if (!B->preallocated) {
1974     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
1975     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
1976     PetscCall(MatSetType(b->A,MATSEQSBAIJ));
1977     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
1978     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
1979   }
1980 
1981   PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
1982   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
1983 
1984   B->preallocated  = PETSC_TRUE;
1985   B->was_assembled = PETSC_FALSE;
1986   B->assembled     = PETSC_FALSE;
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1991 {
1992   PetscInt       m,rstart,cend;
1993   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
1994   const PetscInt *JJ    =NULL;
1995   PetscScalar    *values=NULL;
1996   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
1997   PetscBool      nooffprocentries;
1998 
1999   PetscFunctionBegin;
2000   PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
2001   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
2002   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
2003   PetscCall(PetscLayoutSetUp(B->rmap));
2004   PetscCall(PetscLayoutSetUp(B->cmap));
2005   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2006   m      = B->rmap->n/bs;
2007   rstart = B->rmap->rstart/bs;
2008   cend   = B->cmap->rend/bs;
2009 
2010   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2011   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2012   for (i=0; i<m; i++) {
2013     nz = ii[i+1] - ii[i];
2014     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2015     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2016     JJ     = jj + ii[i];
2017     bd     = 0;
2018     for (j=0; j<nz; j++) {
2019       if (*JJ >= i + rstart) break;
2020       JJ++;
2021       bd++;
2022     }
2023     d  = 0;
2024     for (; j<nz; j++) {
2025       if (*JJ++ >= cend) break;
2026       d++;
2027     }
2028     d_nnz[i] = d;
2029     o_nnz[i] = nz - d - bd;
2030     nz       = nz - bd;
2031     nz_max = PetscMax(nz_max,nz);
2032   }
2033   PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2034   PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE));
2035   PetscCall(PetscFree2(d_nnz,o_nnz));
2036 
2037   values = (PetscScalar*)V;
2038   if (!values) {
2039     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2040   }
2041   for (i=0; i<m; i++) {
2042     PetscInt          row    = i + rstart;
2043     PetscInt          ncols  = ii[i+1] - ii[i];
2044     const PetscInt    *icols = jj + ii[i];
2045     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2046       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2047       PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2048     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2049       PetscInt j;
2050       for (j=0; j<ncols; j++) {
2051         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2052         PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2053       }
2054     }
2055   }
2056 
2057   if (!V) PetscCall(PetscFree(values));
2058   nooffprocentries    = B->nooffprocentries;
2059   B->nooffprocentries = PETSC_TRUE;
2060   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2061   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2062   B->nooffprocentries = nooffprocentries;
2063 
2064   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /*MC
2069    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2070    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2071    the matrix is stored.
2072 
2073    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2074    can call MatSetOption(Mat, MAT_HERMITIAN);
2075 
2076    Options Database Keys:
2077 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2078 
2079    Notes:
2080      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2081      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2082 
2083    Level: beginner
2084 
2085 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2086 M*/
2087 
2088 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2089 {
2090   Mat_MPISBAIJ   *b;
2091   PetscBool      flg = PETSC_FALSE;
2092 
2093   PetscFunctionBegin;
2094   PetscCall(PetscNewLog(B,&b));
2095   B->data = (void*)b;
2096   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2097 
2098   B->ops->destroy = MatDestroy_MPISBAIJ;
2099   B->ops->view    = MatView_MPISBAIJ;
2100   B->assembled    = PETSC_FALSE;
2101   B->insertmode   = NOT_SET_VALUES;
2102 
2103   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2104   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2105 
2106   /* build local table of row and column ownerships */
2107   PetscCall(PetscMalloc1(b->size+2,&b->rangebs));
2108 
2109   /* build cache for off array entries formed */
2110   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2111 
2112   b->donotstash  = PETSC_FALSE;
2113   b->colmap      = NULL;
2114   b->garray      = NULL;
2115   b->roworiented = PETSC_TRUE;
2116 
2117   /* stuff used in block assembly */
2118   b->barray = NULL;
2119 
2120   /* stuff used for matrix vector multiply */
2121   b->lvec    = NULL;
2122   b->Mvctx   = NULL;
2123   b->slvec0  = NULL;
2124   b->slvec0b = NULL;
2125   b->slvec1  = NULL;
2126   b->slvec1a = NULL;
2127   b->slvec1b = NULL;
2128   b->sMvctx  = NULL;
2129 
2130   /* stuff for MatGetRow() */
2131   b->rowindices   = NULL;
2132   b->rowvalues    = NULL;
2133   b->getrowactive = PETSC_FALSE;
2134 
2135   /* hash table stuff */
2136   b->ht           = NULL;
2137   b->hd           = NULL;
2138   b->ht_size      = 0;
2139   b->ht_flag      = PETSC_FALSE;
2140   b->ht_fact      = 0;
2141   b->ht_total_ct  = 0;
2142   b->ht_insert_ct = 0;
2143 
2144   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2145   b->ijonly = PETSC_FALSE;
2146 
2147   b->in_loc = NULL;
2148   b->v_loc  = NULL;
2149   b->n_loc  = 0;
2150 
2151   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ));
2152   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ));
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ));
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2155 #if defined(PETSC_HAVE_ELEMENTAL)
2156   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental));
2157 #endif
2158 #if defined(PETSC_HAVE_SCALAPACK)
2159   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK));
2160 #endif
2161   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic));
2162   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic));
2163 
2164   B->symmetric                   = PETSC_BOOL3_TRUE;
2165   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2166   B->symmetry_eternal            = PETSC_TRUE;
2167   B->structural_symmetry_eternal = PETSC_TRUE ;
2168 #if defined(PETSC_USE_COMPLEX)
2169   B->hermitian                   = PETSC_BOOL3_FALSE;
2170 #else
2171   B->hermitian                   = PETSC_BOOL3_TRUE;
2172 #endif
2173 
2174   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ));
2175   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2176   PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL));
2177   if (flg) {
2178     PetscReal fact = 1.39;
2179     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2180     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2181     if (fact <= 1.0) fact = 1.39;
2182     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2183     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2184   }
2185   PetscOptionsEnd();
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 /*MC
2190    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2191 
2192    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2193    and MATMPISBAIJ otherwise.
2194 
2195    Options Database Keys:
2196 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2197 
2198   Level: beginner
2199 
2200 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2201 M*/
2202 
2203 /*@C
2204    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2205    the user should preallocate the matrix storage by setting the parameters
2206    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2207    performance can be increased by more than a factor of 50.
2208 
2209    Collective on Mat
2210 
2211    Input Parameters:
2212 +  B - the matrix
2213 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2214           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2215 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2216            submatrix  (same for all local rows)
2217 .  d_nnz - array containing the number of block nonzeros in the various block rows
2218            in the upper triangular and diagonal part of the in diagonal portion of the local
2219            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2220            for the diagonal entry and set a value even if it is zero.
2221 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2222            submatrix (same for all local rows).
2223 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2224            off-diagonal portion of the local submatrix that is right of the diagonal
2225            (possibly different for each block row) or NULL.
2226 
2227    Options Database Keys:
2228 +   -mat_no_unroll - uses code that does not unroll the loops in the
2229                      block calculations (much slower)
2230 -   -mat_block_size - size of the blocks to use
2231 
2232    Notes:
2233 
2234    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2235    than it must be used on all processors that share the object for that argument.
2236 
2237    If the *_nnz parameter is given then the *_nz parameter is ignored
2238 
2239    Storage Information:
2240    For a square global matrix we define each processor's diagonal portion
2241    to be its local rows and the corresponding columns (a square submatrix);
2242    each processor's off-diagonal portion encompasses the remainder of the
2243    local matrix (a rectangular submatrix).
2244 
2245    The user can specify preallocated storage for the diagonal part of
2246    the local submatrix with either d_nz or d_nnz (not both).  Set
2247    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2248    memory allocation.  Likewise, specify preallocated storage for the
2249    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2250 
2251    You can call MatGetInfo() to get information on how effective the preallocation was;
2252    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2253    You can also run with the option -info and look for messages with the string
2254    malloc in them to see if additional memory allocation was needed.
2255 
2256    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2257    the figure below we depict these three local rows and all columns (0-11).
2258 
2259 .vb
2260            0 1 2 3 4 5 6 7 8 9 10 11
2261           --------------------------
2262    row 3  |. . . d d d o o o o  o  o
2263    row 4  |. . . d d d o o o o  o  o
2264    row 5  |. . . d d d o o o o  o  o
2265           --------------------------
2266 .ve
2267 
2268    Thus, any entries in the d locations are stored in the d (diagonal)
2269    submatrix, and any entries in the o locations are stored in the
2270    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2271    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2272 
2273    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2274    plus the diagonal part of the d matrix,
2275    and o_nz should indicate the number of block nonzeros per row in the o matrix
2276 
2277    In general, for PDE problems in which most nonzeros are near the diagonal,
2278    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2279    or you will get TERRIBLE performance; see the users' manual chapter on
2280    matrices.
2281 
2282    Level: intermediate
2283 
2284 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2285 @*/
2286 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2287 {
2288   PetscFunctionBegin;
2289   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2290   PetscValidType(B,1);
2291   PetscValidLogicalCollectiveInt(B,bs,2);
2292   PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 /*@C
2297    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2298    (block compressed row).  For good matrix assembly performance
2299    the user should preallocate the matrix storage by setting the parameters
2300    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2301    performance can be increased by more than a factor of 50.
2302 
2303    Collective
2304 
2305    Input Parameters:
2306 +  comm - MPI communicator
2307 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2308           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2309 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2310            This value should be the same as the local size used in creating the
2311            y vector for the matrix-vector product y = Ax.
2312 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2313            This value should be the same as the local size used in creating the
2314            x vector for the matrix-vector product y = Ax.
2315 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2316 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2317 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2318            submatrix  (same for all local rows)
2319 .  d_nnz - array containing the number of block nonzeros in the various block rows
2320            in the upper triangular portion of the in diagonal portion of the local
2321            (possibly different for each block block row) or NULL.
2322            If you plan to factor the matrix you must leave room for the diagonal entry and
2323            set its value even if it is zero.
2324 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2325            submatrix (same for all local rows).
2326 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2327            off-diagonal portion of the local submatrix (possibly different for
2328            each block row) or NULL.
2329 
2330    Output Parameter:
2331 .  A - the matrix
2332 
2333    Options Database Keys:
2334 +   -mat_no_unroll - uses code that does not unroll the loops in the
2335                      block calculations (much slower)
2336 .   -mat_block_size - size of the blocks to use
2337 -   -mat_mpi - use the parallel matrix data structures even on one processor
2338                (defaults to using SeqBAIJ format on one processor)
2339 
2340    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2341    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2342    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2343 
2344    Notes:
2345    The number of rows and columns must be divisible by blocksize.
2346    This matrix type does not support complex Hermitian operation.
2347 
2348    The user MUST specify either the local or global matrix dimensions
2349    (possibly both).
2350 
2351    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2352    than it must be used on all processors that share the object for that argument.
2353 
2354    If the *_nnz parameter is given then the *_nz parameter is ignored
2355 
2356    Storage Information:
2357    For a square global matrix we define each processor's diagonal portion
2358    to be its local rows and the corresponding columns (a square submatrix);
2359    each processor's off-diagonal portion encompasses the remainder of the
2360    local matrix (a rectangular submatrix).
2361 
2362    The user can specify preallocated storage for the diagonal part of
2363    the local submatrix with either d_nz or d_nnz (not both).  Set
2364    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2365    memory allocation.  Likewise, specify preallocated storage for the
2366    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2367 
2368    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2369    the figure below we depict these three local rows and all columns (0-11).
2370 
2371 .vb
2372            0 1 2 3 4 5 6 7 8 9 10 11
2373           --------------------------
2374    row 3  |. . . d d d o o o o  o  o
2375    row 4  |. . . d d d o o o o  o  o
2376    row 5  |. . . d d d o o o o  o  o
2377           --------------------------
2378 .ve
2379 
2380    Thus, any entries in the d locations are stored in the d (diagonal)
2381    submatrix, and any entries in the o locations are stored in the
2382    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2383    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2384 
2385    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2386    plus the diagonal part of the d matrix,
2387    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2388    In general, for PDE problems in which most nonzeros are near the diagonal,
2389    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2390    or you will get TERRIBLE performance; see the users' manual chapter on
2391    matrices.
2392 
2393    Level: intermediate
2394 
2395 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2396 @*/
2397 
2398 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2399 {
2400   PetscMPIInt    size;
2401 
2402   PetscFunctionBegin;
2403   PetscCall(MatCreate(comm,A));
2404   PetscCall(MatSetSizes(*A,m,n,M,N));
2405   PetscCallMPI(MPI_Comm_size(comm,&size));
2406   if (size > 1) {
2407     PetscCall(MatSetType(*A,MATMPISBAIJ));
2408     PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
2409   } else {
2410     PetscCall(MatSetType(*A,MATSEQSBAIJ));
2411     PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
2412   }
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2417 {
2418   Mat            mat;
2419   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2420   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2421   PetscScalar    *array;
2422 
2423   PetscFunctionBegin;
2424   *newmat = NULL;
2425 
2426   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2427   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2428   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2429   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2430   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2431 
2432   mat->factortype   = matin->factortype;
2433   mat->preallocated = PETSC_TRUE;
2434   mat->assembled    = PETSC_TRUE;
2435   mat->insertmode   = NOT_SET_VALUES;
2436 
2437   a      = (Mat_MPISBAIJ*)mat->data;
2438   a->bs2 = oldmat->bs2;
2439   a->mbs = oldmat->mbs;
2440   a->nbs = oldmat->nbs;
2441   a->Mbs = oldmat->Mbs;
2442   a->Nbs = oldmat->Nbs;
2443 
2444   a->size         = oldmat->size;
2445   a->rank         = oldmat->rank;
2446   a->donotstash   = oldmat->donotstash;
2447   a->roworiented  = oldmat->roworiented;
2448   a->rowindices   = NULL;
2449   a->rowvalues    = NULL;
2450   a->getrowactive = PETSC_FALSE;
2451   a->barray       = NULL;
2452   a->rstartbs     = oldmat->rstartbs;
2453   a->rendbs       = oldmat->rendbs;
2454   a->cstartbs     = oldmat->cstartbs;
2455   a->cendbs       = oldmat->cendbs;
2456 
2457   /* hash table stuff */
2458   a->ht           = NULL;
2459   a->hd           = NULL;
2460   a->ht_size      = 0;
2461   a->ht_flag      = oldmat->ht_flag;
2462   a->ht_fact      = oldmat->ht_fact;
2463   a->ht_total_ct  = 0;
2464   a->ht_insert_ct = 0;
2465 
2466   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2));
2467   if (oldmat->colmap) {
2468 #if defined(PETSC_USE_CTABLE)
2469     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2470 #else
2471     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
2472     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
2473     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
2474 #endif
2475   } else a->colmap = NULL;
2476 
2477   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2478     PetscCall(PetscMalloc1(len,&a->garray));
2479     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2480     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2481   } else a->garray = NULL;
2482 
2483   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
2484   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2485   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2486   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2487   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2488 
2489   PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0));
2490   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2491   PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1));
2492   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2493 
2494   PetscCall(VecGetLocalSize(a->slvec1,&nt));
2495   PetscCall(VecGetArray(a->slvec1,&array));
2496   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a));
2497   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b));
2498   PetscCall(VecRestoreArray(a->slvec1,&array));
2499   PetscCall(VecGetArray(a->slvec0,&array));
2500   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b));
2501   PetscCall(VecRestoreArray(a->slvec0,&array));
2502   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2503   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2504   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b));
2505   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a));
2506   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b));
2507 
2508   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2509   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2510   a->sMvctx = oldmat->sMvctx;
2511   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx));
2512 
2513   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2514   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2515   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2516   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2517   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2518   *newmat = mat;
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2523 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2524 
2525 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2526 {
2527   PetscBool      isbinary;
2528 
2529   PetscFunctionBegin;
2530   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2531   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2532   PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer));
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 /*XXXXX@
2537    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2538 
2539    Input Parameters:
2540 .  mat  - the matrix
2541 .  fact - factor
2542 
2543    Not Collective on Mat, each process can have a different hash factor
2544 
2545    Level: advanced
2546 
2547   Notes:
2548    This can also be set by the command line option: -mat_use_hash_table fact
2549 
2550 .seealso: `MatSetOption()`
2551 @XXXXX*/
2552 
2553 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2554 {
2555   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2556   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2557   PetscReal      atmp;
2558   PetscReal      *work,*svalues,*rvalues;
2559   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2560   PetscMPIInt    rank,size;
2561   PetscInt       *rowners_bs,dest,count,source;
2562   PetscScalar    *va;
2563   MatScalar      *ba;
2564   MPI_Status     stat;
2565 
2566   PetscFunctionBegin;
2567   PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2568   PetscCall(MatGetRowMaxAbs(a->A,v,NULL));
2569   PetscCall(VecGetArray(v,&va));
2570 
2571   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2572   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2573 
2574   bs  = A->rmap->bs;
2575   mbs = a->mbs;
2576   Mbs = a->Mbs;
2577   ba  = b->a;
2578   bi  = b->i;
2579   bj  = b->j;
2580 
2581   /* find ownerships */
2582   rowners_bs = A->rmap->range;
2583 
2584   /* each proc creates an array to be distributed */
2585   PetscCall(PetscCalloc1(bs*Mbs,&work));
2586 
2587   /* row_max for B */
2588   if (rank != size-1) {
2589     for (i=0; i<mbs; i++) {
2590       ncols = bi[1] - bi[0]; bi++;
2591       brow  = bs*i;
2592       for (j=0; j<ncols; j++) {
2593         bcol = bs*(*bj);
2594         for (kcol=0; kcol<bs; kcol++) {
2595           col  = bcol + kcol;                /* local col index */
2596           col += rowners_bs[rank+1];      /* global col index */
2597           for (krow=0; krow<bs; krow++) {
2598             atmp = PetscAbsScalar(*ba); ba++;
2599             row  = brow + krow;   /* local row index */
2600             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2601             if (work[col] < atmp) work[col] = atmp;
2602           }
2603         }
2604         bj++;
2605       }
2606     }
2607 
2608     /* send values to its owners */
2609     for (dest=rank+1; dest<size; dest++) {
2610       svalues = work + rowners_bs[dest];
2611       count   = rowners_bs[dest+1]-rowners_bs[dest];
2612       PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A)));
2613     }
2614   }
2615 
2616   /* receive values */
2617   if (rank) {
2618     rvalues = work;
2619     count   = rowners_bs[rank+1]-rowners_bs[rank];
2620     for (source=0; source<rank; source++) {
2621       PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat));
2622       /* process values */
2623       for (i=0; i<count; i++) {
2624         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2625       }
2626     }
2627   }
2628 
2629   PetscCall(VecRestoreArray(v,&va));
2630   PetscCall(PetscFree(work));
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2635 {
2636   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2637   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2638   PetscScalar       *x,*ptr,*from;
2639   Vec               bb1;
2640   const PetscScalar *b;
2641 
2642   PetscFunctionBegin;
2643   PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
2644   PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2645 
2646   if (flag == SOR_APPLY_UPPER) {
2647     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2648     PetscFunctionReturn(0);
2649   }
2650 
2651   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2652     if (flag & SOR_ZERO_INITIAL_GUESS) {
2653       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx));
2654       its--;
2655     }
2656 
2657     PetscCall(VecDuplicate(bb,&bb1));
2658     while (its--) {
2659 
2660       /* lower triangular part: slvec0b = - B^T*xx */
2661       PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2662 
2663       /* copy xx into slvec0a */
2664       PetscCall(VecGetArray(mat->slvec0,&ptr));
2665       PetscCall(VecGetArray(xx,&x));
2666       PetscCall(PetscArraycpy(ptr,x,bs*mbs));
2667       PetscCall(VecRestoreArray(mat->slvec0,&ptr));
2668 
2669       PetscCall(VecScale(mat->slvec0,-1.0));
2670 
2671       /* copy bb into slvec1a */
2672       PetscCall(VecGetArray(mat->slvec1,&ptr));
2673       PetscCall(VecGetArrayRead(bb,&b));
2674       PetscCall(PetscArraycpy(ptr,b,bs*mbs));
2675       PetscCall(VecRestoreArray(mat->slvec1,&ptr));
2676 
2677       /* set slvec1b = 0 */
2678       PetscCall(VecSet(mat->slvec1b,0.0));
2679 
2680       PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2681       PetscCall(VecRestoreArray(xx,&x));
2682       PetscCall(VecRestoreArrayRead(bb,&b));
2683       PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2684 
2685       /* upper triangular part: bb1 = bb1 - B*x */
2686       PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1));
2687 
2688       /* local diagonal sweep */
2689       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx));
2690     }
2691     PetscCall(VecDestroy(&bb1));
2692   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2693     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2694   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2695     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2696   } else if (flag & SOR_EISENSTAT) {
2697     Vec               xx1;
2698     PetscBool         hasop;
2699     const PetscScalar *diag;
2700     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2701     PetscInt          i,n;
2702 
2703     if (!mat->xx1) {
2704       PetscCall(VecDuplicate(bb,&mat->xx1));
2705       PetscCall(VecDuplicate(bb,&mat->bb1));
2706     }
2707     xx1 = mat->xx1;
2708     bb1 = mat->bb1;
2709 
2710     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
2711 
2712     if (!mat->diag) {
2713       /* this is wrong for same matrix with new nonzero values */
2714       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
2715       PetscCall(MatGetDiagonal(matin,mat->diag));
2716     }
2717     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
2718 
2719     if (hasop) {
2720       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
2721       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2722     } else {
2723       /*
2724           These two lines are replaced by code that may be a bit faster for a good compiler
2725       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2726       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2727       */
2728       PetscCall(VecGetArray(mat->slvec1a,&sl));
2729       PetscCall(VecGetArrayRead(mat->diag,&diag));
2730       PetscCall(VecGetArrayRead(bb,&b));
2731       PetscCall(VecGetArray(xx,&x));
2732       PetscCall(VecGetLocalSize(xx,&n));
2733       if (omega == 1.0) {
2734         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2735         PetscCall(PetscLogFlops(2.0*n));
2736       } else {
2737         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2738         PetscCall(PetscLogFlops(3.0*n));
2739       }
2740       PetscCall(VecRestoreArray(mat->slvec1a,&sl));
2741       PetscCall(VecRestoreArrayRead(mat->diag,&diag));
2742       PetscCall(VecRestoreArrayRead(bb,&b));
2743       PetscCall(VecRestoreArray(xx,&x));
2744     }
2745 
2746     /* multiply off-diagonal portion of matrix */
2747     PetscCall(VecSet(mat->slvec1b,0.0));
2748     PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2749     PetscCall(VecGetArray(mat->slvec0,&from));
2750     PetscCall(VecGetArray(xx,&x));
2751     PetscCall(PetscArraycpy(from,x,bs*mbs));
2752     PetscCall(VecRestoreArray(mat->slvec0,&from));
2753     PetscCall(VecRestoreArray(xx,&x));
2754     PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2755     PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2756     PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a));
2757 
2758     /* local sweep */
2759     PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
2760     PetscCall(VecAXPY(xx,1.0,xx1));
2761   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 /*@
2766      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2767          CSR format the local rows.
2768 
2769    Collective
2770 
2771    Input Parameters:
2772 +  comm - MPI communicator
2773 .  bs - the block size, only a block size of 1 is supported
2774 .  m - number of local rows (Cannot be PETSC_DECIDE)
2775 .  n - This value should be the same as the local size used in creating the
2776        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2777        calculated if N is given) For square matrices n is almost always m.
2778 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2779 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2780 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2781 .   j - column indices
2782 -   a - matrix values
2783 
2784    Output Parameter:
2785 .   mat - the matrix
2786 
2787    Level: intermediate
2788 
2789    Notes:
2790        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2791      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2792      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2793 
2794        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2795 
2796 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2797           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2798 @*/
2799 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2800 {
2801   PetscFunctionBegin;
2802   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2803   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2804   PetscCall(MatCreate(comm,mat));
2805   PetscCall(MatSetSizes(*mat,m,n,M,N));
2806   PetscCall(MatSetType(*mat,MATMPISBAIJ));
2807   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a));
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 /*@C
2812    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2813 
2814    Collective
2815 
2816    Input Parameters:
2817 +  B - the matrix
2818 .  bs - the block size
2819 .  i - the indices into j for the start of each local row (starts with zero)
2820 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2821 -  v - optional values in the matrix
2822 
2823    Level: advanced
2824 
2825    Notes:
2826    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2827    and usually the numerical values as well
2828 
2829    Any entries below the diagonal are ignored
2830 
2831 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2832 @*/
2833 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2834 {
2835   PetscFunctionBegin;
2836   PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2841 {
2842   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2843   PetscInt       *indx;
2844   PetscScalar    *values;
2845 
2846   PetscFunctionBegin;
2847   PetscCall(MatGetSize(inmat,&m,&N));
2848   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2849     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2850     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2851     PetscInt       *bindx,rmax=a->rmax,j;
2852     PetscMPIInt    rank,size;
2853 
2854     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2855     mbs = m/bs; Nbs = N/cbs;
2856     if (n == PETSC_DECIDE) {
2857       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
2858     }
2859     nbs = n/cbs;
2860 
2861     PetscCall(PetscMalloc1(rmax,&bindx));
2862     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
2863 
2864     PetscCallMPI(MPI_Comm_rank(comm,&rank));
2865     PetscCallMPI(MPI_Comm_rank(comm,&size));
2866     if (rank == size-1) {
2867       /* Check sum(nbs) = Nbs */
2868       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
2869     }
2870 
2871     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2872     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2873     for (i=0; i<mbs; i++) {
2874       PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
2875       nnz  = nnz/bs;
2876       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2877       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
2878       PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL));
2879     }
2880     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2881     PetscCall(PetscFree(bindx));
2882 
2883     PetscCall(MatCreate(comm,outmat));
2884     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
2885     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
2886     PetscCall(MatSetType(*outmat,MATSBAIJ));
2887     PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz));
2888     PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
2889     MatPreallocateEnd(dnz,onz);
2890   }
2891 
2892   /* numeric phase */
2893   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2894   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
2895 
2896   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2897   for (i=0; i<m; i++) {
2898     PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2899     Ii   = i + rstart;
2900     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
2901     PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2902   }
2903   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2904   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
2905   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
2906   PetscFunctionReturn(0);
2907 }
2908