xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4ffacfe27a72f4cdf51b68a3bbb6aed96040fb2f)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21 
22   PetscFunctionBegin;
23   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
24   PetscValidLogicalCollectiveInt(B,nm,2);
25   for (i = 0; i < nm; i++) {
26     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
27     PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong));
28     PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
29   }
30   PetscValidLogicalCollectiveBool(B,fill,5);
31   PetscCall(MatGetBlockSize(B,&bs));
32   PetscCall(MatGetSize(B,&M,&N));
33   PetscCall(MatGetLocalSize(B,&m,&n));
34   PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator));
35   PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
36   PetscCall(MatSetBlockSize(preallocator,bs));
37   PetscCall(MatSetSizes(preallocator,m,n,M,N));
38   PetscCall(MatSetUp(preallocator));
39   PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
40   for (r = rstart; r < rend; ++r) {
41     PetscInt          ncols;
42     const PetscInt    *row;
43     const PetscScalar *vals;
44 
45     for (i = 0; i < nm; i++) {
46       PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals));
47       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
48       if (symm && symm[i]) {
49         PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES));
50       }
51       PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals));
52     }
53   }
54   PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
55   PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
56   PetscCall(MatPreallocatorPreallocate(preallocator,fill,B));
57   PetscCall(MatDestroy(&preallocator));
58   PetscFunctionReturn(0);
59 }
60 
61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
62 {
63   Mat            B;
64   PetscInt       r;
65 
66   PetscFunctionBegin;
67   if (reuse != MAT_REUSE_MATRIX) {
68     PetscBool symm = PETSC_TRUE,isdense;
69     PetscInt  bs;
70 
71     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
72     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
73     PetscCall(MatSetType(B,newtype));
74     PetscCall(MatGetBlockSize(A,&bs));
75     PetscCall(MatSetBlockSize(B,bs));
76     PetscCall(PetscLayoutSetUp(B->rmap));
77     PetscCall(PetscLayoutSetUp(B->cmap));
78     PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,""));
79     if (!isdense) {
80       PetscCall(MatGetRowUpperTriangular(A));
81       PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE));
82       PetscCall(MatRestoreRowUpperTriangular(A));
83     } else {
84       PetscCall(MatSetUp(B));
85     }
86   } else {
87     B    = *newmat;
88     PetscCall(MatZeroEntries(B));
89   }
90 
91   PetscCall(MatGetRowUpperTriangular(A));
92   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
93     PetscInt          ncols;
94     const PetscInt    *row;
95     const PetscScalar *vals;
96 
97     PetscCall(MatGetRow(A,r,&ncols,&row,&vals));
98     PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES));
99 #if defined(PETSC_USE_COMPLEX)
100     if (A->hermitian) {
101       PetscInt i;
102       for (i = 0; i < ncols; i++) {
103         PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES));
104       }
105     } else {
106       PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
107     }
108 #else
109     PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
110 #endif
111     PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals));
112   }
113   PetscCall(MatRestoreRowUpperTriangular(A));
114   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
116 
117   if (reuse == MAT_INPLACE_MATRIX) {
118     PetscCall(MatHeaderReplace(A,&B));
119   } else {
120     *newmat = B;
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
126 {
127   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
128 
129   PetscFunctionBegin;
130   PetscCall(MatStoreValues(aij->A));
131   PetscCall(MatStoreValues(aij->B));
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138 
139   PetscFunctionBegin;
140   PetscCall(MatRetrieveValues(aij->A));
141   PetscCall(MatRetrieveValues(aij->B));
142   PetscFunctionReturn(0);
143 }
144 
145 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
146   { \
147     brow = row/bs;  \
148     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
149     rmax = aimax[brow]; nrow = ailen[brow]; \
150     bcol = col/bs; \
151     ridx = row % bs; cidx = col % bs; \
152     low  = 0; high = nrow; \
153     while (high-low > 3) { \
154       t = (low+high)/2; \
155       if (rp[t] > bcol) high = t; \
156       else              low  = t; \
157     } \
158     for (_i=low; _i<high; _i++) { \
159       if (rp[_i] > bcol) break; \
160       if (rp[_i] == bcol) { \
161         bap = ap + bs2*_i + bs*cidx + ridx; \
162         if (addv == ADD_VALUES) *bap += value;  \
163         else                    *bap  = value;  \
164         goto a_noinsert; \
165       } \
166     } \
167     if (a->nonew == 1) goto a_noinsert; \
168     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
170     N = nrow++ - 1;  \
171     /* shift up all the later entries in this row */ \
172     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
173     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
174     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
175     rp[_i]                      = bcol;  \
176     ap[bs2*_i + bs*cidx + ridx] = value;  \
177     A->nonzerostate++;\
178 a_noinsert:; \
179     ailen[brow] = nrow; \
180   }
181 
182 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
183   { \
184     brow = row/bs;  \
185     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
186     rmax = bimax[brow]; nrow = bilen[brow]; \
187     bcol = col/bs; \
188     ridx = row % bs; cidx = col % bs; \
189     low  = 0; high = nrow; \
190     while (high-low > 3) { \
191       t = (low+high)/2; \
192       if (rp[t] > bcol) high = t; \
193       else              low  = t; \
194     } \
195     for (_i=low; _i<high; _i++) { \
196       if (rp[_i] > bcol) break; \
197       if (rp[_i] == bcol) { \
198         bap = ap + bs2*_i + bs*cidx + ridx; \
199         if (addv == ADD_VALUES) *bap += value;  \
200         else                    *bap  = value;  \
201         goto b_noinsert; \
202       } \
203     } \
204     if (b->nonew == 1) goto b_noinsert; \
205     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
206     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
207     N = nrow++ - 1;  \
208     /* shift up all the later entries in this row */ \
209     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
210     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
211     PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \
212     rp[_i]                      = bcol;  \
213     ap[bs2*_i + bs*cidx + ridx] = value;  \
214     B->nonzerostate++;\
215 b_noinsert:; \
216     bilen[brow] = nrow; \
217   }
218 
219 /* Only add/insert a(i,j) with i<=j (blocks).
220    Any a(i,j) with i>j input by user is ingored or generates an error
221 */
222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
223 {
224   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
225   MatScalar      value;
226   PetscBool      roworiented = baij->roworiented;
227   PetscInt       i,j,row,col;
228   PetscInt       rstart_orig=mat->rmap->rstart;
229   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
230   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
231 
232   /* Some Variables required in the macro */
233   Mat          A     = baij->A;
234   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
235   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
236   MatScalar    *aa   =a->a;
237 
238   Mat         B     = baij->B;
239   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
240   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
241   MatScalar   *ba   =b->a;
242 
243   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
244   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
245   MatScalar *ap,*bap;
246 
247   /* for stash */
248   PetscInt  n_loc, *in_loc = NULL;
249   MatScalar *v_loc = NULL;
250 
251   PetscFunctionBegin;
252   if (!baij->donotstash) {
253     if (n > baij->n_loc) {
254       PetscCall(PetscFree(baij->in_loc));
255       PetscCall(PetscFree(baij->v_loc));
256       PetscCall(PetscMalloc1(n,&baij->in_loc));
257       PetscCall(PetscMalloc1(n,&baij->v_loc));
258 
259       baij->n_loc = n;
260     }
261     in_loc = baij->in_loc;
262     v_loc  = baij->v_loc;
263   }
264 
265   for (i=0; i<m; i++) {
266     if (im[i] < 0) continue;
267     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
268     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
269       row = im[i] - rstart_orig;              /* local row index */
270       for (j=0; j<n; j++) {
271         if (im[i]/bs > in[j]/bs) {
272           if (a->ignore_ltriangular) {
273             continue;    /* ignore lower triangular blocks */
274           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
275         }
276         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
277           col  = in[j] - cstart_orig;         /* local col index */
278           brow = row/bs; bcol = col/bs;
279           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
280           if (roworiented) value = v[i*n+j];
281           else             value = v[i+j*m];
282           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
283           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
284         } else if (in[j] < 0) {
285           continue;
286         } else {
287           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
288           /* off-diag entry (B) */
289           if (mat->was_assembled) {
290             if (!baij->colmap) {
291               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
292             }
293 #if defined(PETSC_USE_CTABLE)
294             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
295             col  = col - 1;
296 #else
297             col = baij->colmap[in[j]/bs] - 1;
298 #endif
299             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
300               PetscCall(MatDisAssemble_MPISBAIJ(mat));
301               col  =  in[j];
302               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
303               B    = baij->B;
304               b    = (Mat_SeqBAIJ*)(B)->data;
305               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
306               ba   = b->a;
307             } else col += in[j]%bs;
308           } else col = in[j];
309           if (roworiented) value = v[i*n+j];
310           else             value = v[i+j*m];
311           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
312           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
313         }
314       }
315     } else {  /* off processor entry */
316       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
317       if (!baij->donotstash) {
318         mat->assembled = PETSC_FALSE;
319         n_loc          = 0;
320         for (j=0; j<n; j++) {
321           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
322           in_loc[n_loc] = in[j];
323           if (roworiented) {
324             v_loc[n_loc] = v[i*n+j];
325           } else {
326             v_loc[n_loc] = v[j*m+i];
327           }
328           n_loc++;
329         }
330         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE));
331       }
332     }
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
338 {
339   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
340   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
341   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
342   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
343   PetscBool         roworiented=a->roworiented;
344   const PetscScalar *value     = v;
345   MatScalar         *ap,*aa = a->a,*bap;
346 
347   PetscFunctionBegin;
348   if (col < row) {
349     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
350     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
351   }
352   rp   = aj + ai[row];
353   ap   = aa + bs2*ai[row];
354   rmax = imax[row];
355   nrow = ailen[row];
356   value = v;
357   low   = 0;
358   high  = nrow;
359 
360   while (high-low > 7) {
361     t = (low+high)/2;
362     if (rp[t] > col) high = t;
363     else             low  = t;
364   }
365   for (i=low; i<high; i++) {
366     if (rp[i] > col) break;
367     if (rp[i] == col) {
368       bap = ap +  bs2*i;
369       if (roworiented) {
370         if (is == ADD_VALUES) {
371           for (ii=0; ii<bs; ii++) {
372             for (jj=ii; jj<bs2; jj+=bs) {
373               bap[jj] += *value++;
374             }
375           }
376         } else {
377           for (ii=0; ii<bs; ii++) {
378             for (jj=ii; jj<bs2; jj+=bs) {
379               bap[jj] = *value++;
380             }
381           }
382         }
383       } else {
384         if (is == ADD_VALUES) {
385           for (ii=0; ii<bs; ii++) {
386             for (jj=0; jj<bs; jj++) {
387               *bap++ += *value++;
388             }
389           }
390         } else {
391           for (ii=0; ii<bs; ii++) {
392             for (jj=0; jj<bs; jj++) {
393               *bap++  = *value++;
394             }
395           }
396         }
397       }
398       goto noinsert2;
399     }
400   }
401   if (nonew == 1) goto noinsert2;
402   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
403   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
404   N = nrow++ - 1; high++;
405   /* shift up all the later entries in this row */
406   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
407   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
408   rp[i] = col;
409   bap   = ap +  bs2*i;
410   if (roworiented) {
411     for (ii=0; ii<bs; ii++) {
412       for (jj=ii; jj<bs2; jj+=bs) {
413         bap[jj] = *value++;
414       }
415     }
416   } else {
417     for (ii=0; ii<bs; ii++) {
418       for (jj=0; jj<bs; jj++) {
419         *bap++ = *value++;
420       }
421     }
422   }
423   noinsert2:;
424   ailen[row] = nrow;
425   PetscFunctionReturn(0);
426 }
427 
428 /*
429    This routine is exactly duplicated in mpibaij.c
430 */
431 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
432 {
433   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
434   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
435   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
436   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
437   PetscBool         roworiented=a->roworiented;
438   const PetscScalar *value     = v;
439   MatScalar         *ap,*aa = a->a,*bap;
440 
441   PetscFunctionBegin;
442   rp   = aj + ai[row];
443   ap   = aa + bs2*ai[row];
444   rmax = imax[row];
445   nrow = ailen[row];
446   low  = 0;
447   high = nrow;
448   value = v;
449   while (high-low > 7) {
450     t = (low+high)/2;
451     if (rp[t] > col) high = t;
452     else             low  = t;
453   }
454   for (i=low; i<high; i++) {
455     if (rp[i] > col) break;
456     if (rp[i] == col) {
457       bap = ap +  bs2*i;
458       if (roworiented) {
459         if (is == ADD_VALUES) {
460           for (ii=0; ii<bs; ii++) {
461             for (jj=ii; jj<bs2; jj+=bs) {
462               bap[jj] += *value++;
463             }
464           }
465         } else {
466           for (ii=0; ii<bs; ii++) {
467             for (jj=ii; jj<bs2; jj+=bs) {
468               bap[jj] = *value++;
469             }
470           }
471         }
472       } else {
473         if (is == ADD_VALUES) {
474           for (ii=0; ii<bs; ii++,value+=bs) {
475             for (jj=0; jj<bs; jj++) {
476               bap[jj] += value[jj];
477             }
478             bap += bs;
479           }
480         } else {
481           for (ii=0; ii<bs; ii++,value+=bs) {
482             for (jj=0; jj<bs; jj++) {
483               bap[jj]  = value[jj];
484             }
485             bap += bs;
486           }
487         }
488       }
489       goto noinsert2;
490     }
491   }
492   if (nonew == 1) goto noinsert2;
493   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
494   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
495   N = nrow++ - 1; high++;
496   /* shift up all the later entries in this row */
497   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
498   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
499   rp[i] = col;
500   bap   = ap +  bs2*i;
501   if (roworiented) {
502     for (ii=0; ii<bs; ii++) {
503       for (jj=ii; jj<bs2; jj+=bs) {
504         bap[jj] = *value++;
505       }
506     }
507   } else {
508     for (ii=0; ii<bs; ii++) {
509       for (jj=0; jj<bs; jj++) {
510         *bap++ = *value++;
511       }
512     }
513   }
514   noinsert2:;
515   ailen[row] = nrow;
516   PetscFunctionReturn(0);
517 }
518 
519 /*
520     This routine could be optimized by removing the need for the block copy below and passing stride information
521   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
522 */
523 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
524 {
525   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
526   const MatScalar *value;
527   MatScalar       *barray     =baij->barray;
528   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
529   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
530   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
531   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
532 
533   PetscFunctionBegin;
534   if (!barray) {
535     PetscCall(PetscMalloc1(bs2,&barray));
536     baij->barray = barray;
537   }
538 
539   if (roworiented) {
540     stepval = (n-1)*bs;
541   } else {
542     stepval = (m-1)*bs;
543   }
544   for (i=0; i<m; i++) {
545     if (im[i] < 0) continue;
546     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
547     if (im[i] >= rstart && im[i] < rend) {
548       row = im[i] - rstart;
549       for (j=0; j<n; j++) {
550         if (im[i] > in[j]) {
551           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
552           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
553         }
554         /* If NumCol = 1 then a copy is not required */
555         if ((roworiented) && (n == 1)) {
556           barray = (MatScalar*) v + i*bs2;
557         } else if ((!roworiented) && (m == 1)) {
558           barray = (MatScalar*) v + j*bs2;
559         } else { /* Here a copy is required */
560           if (roworiented) {
561             value = v + i*(stepval+bs)*bs + j*bs;
562           } else {
563             value = v + j*(stepval+bs)*bs + i*bs;
564           }
565           for (ii=0; ii<bs; ii++,value+=stepval) {
566             for (jj=0; jj<bs; jj++) {
567               *barray++ = *value++;
568             }
569           }
570           barray -=bs2;
571         }
572 
573         if (in[j] >= cstart && in[j] < cend) {
574           col  = in[j] - cstart;
575           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
576         } else if (in[j] < 0) {
577           continue;
578         } else {
579           PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
580           if (mat->was_assembled) {
581             if (!baij->colmap) {
582               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
583             }
584 
585 #if defined(PETSC_USE_DEBUG)
586 #if defined(PETSC_USE_CTABLE)
587             { PetscInt data;
588               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
589               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
590             }
591 #else
592             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
593 #endif
594 #endif
595 #if defined(PETSC_USE_CTABLE)
596             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
597             col  = (col - 1)/bs;
598 #else
599             col = (baij->colmap[in[j]] - 1)/bs;
600 #endif
601             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
602               PetscCall(MatDisAssemble_MPISBAIJ(mat));
603               col  = in[j];
604             }
605           } else col = in[j];
606           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
607         }
608       }
609     } else {
610       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
611       if (!baij->donotstash) {
612         if (roworiented) {
613           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
614         } else {
615           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
616         }
617       }
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
624 {
625   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
626   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
627   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
628 
629   PetscFunctionBegin;
630   for (i=0; i<m; i++) {
631     if (idxm[i] < 0) continue; /* negative row */
632     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
633     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
634       row = idxm[i] - bsrstart;
635       for (j=0; j<n; j++) {
636         if (idxn[j] < 0) continue; /* negative column */
637         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
638         if (idxn[j] >= bscstart && idxn[j] < bscend) {
639           col  = idxn[j] - bscstart;
640           PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
641         } else {
642           if (!baij->colmap) {
643             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
644           }
645 #if defined(PETSC_USE_CTABLE)
646           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
647           data--;
648 #else
649           data = baij->colmap[idxn[j]/bs]-1;
650 #endif
651           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
652           else {
653             col  = data + idxn[j]%bs;
654             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
655           }
656         }
657       }
658     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
664 {
665   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
666   PetscReal      sum[2],*lnorm2;
667 
668   PetscFunctionBegin;
669   if (baij->size == 1) {
670     PetscCall(MatNorm(baij->A,type,norm));
671   } else {
672     if (type == NORM_FROBENIUS) {
673       PetscCall(PetscMalloc1(2,&lnorm2));
674       PetscCall(MatNorm(baij->A,type,lnorm2));
675       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
676       PetscCall(MatNorm(baij->B,type,lnorm2));
677       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
678       PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
679       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
680       PetscCall(PetscFree(lnorm2));
681     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
682       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
683       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
684       PetscReal    *rsum,*rsum2,vabs;
685       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
686       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
687       MatScalar    *v;
688 
689       PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2));
690       PetscCall(PetscArrayzero(rsum,mat->cmap->N));
691       /* Amat */
692       v = amat->a; jj = amat->j;
693       for (brow=0; brow<mbs; brow++) {
694         grow = bs*(rstart + brow);
695         nz   = amat->i[brow+1] - amat->i[brow];
696         for (bcol=0; bcol<nz; bcol++) {
697           gcol = bs*(rstart + *jj); jj++;
698           for (col=0; col<bs; col++) {
699             for (row=0; row<bs; row++) {
700               vabs            = PetscAbsScalar(*v); v++;
701               rsum[gcol+col] += vabs;
702               /* non-diagonal block */
703               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
704             }
705           }
706         }
707         PetscCall(PetscLogFlops(nz*bs*bs));
708       }
709       /* Bmat */
710       v = bmat->a; jj = bmat->j;
711       for (brow=0; brow<mbs; brow++) {
712         grow = bs*(rstart + brow);
713         nz = bmat->i[brow+1] - bmat->i[brow];
714         for (bcol=0; bcol<nz; bcol++) {
715           gcol = bs*garray[*jj]; jj++;
716           for (col=0; col<bs; col++) {
717             for (row=0; row<bs; row++) {
718               vabs            = PetscAbsScalar(*v); v++;
719               rsum[gcol+col] += vabs;
720               rsum[grow+row] += vabs;
721             }
722           }
723         }
724         PetscCall(PetscLogFlops(nz*bs*bs));
725       }
726       PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
727       *norm = 0.0;
728       for (col=0; col<mat->cmap->N; col++) {
729         if (rsum2[col] > *norm) *norm = rsum2[col];
730       }
731       PetscCall(PetscFree2(rsum,rsum2));
732     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
733   }
734   PetscFunctionReturn(0);
735 }
736 
737 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
738 {
739   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
740   PetscInt       nstash,reallocs;
741 
742   PetscFunctionBegin;
743   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
744 
745   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
746   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
747   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
748   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
749   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
750   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
751   PetscFunctionReturn(0);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
755 {
756   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
757   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
758   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
759   PetscInt       *row,*col;
760   PetscBool      other_disassembled;
761   PetscMPIInt    n;
762   PetscBool      r1,r2,r3;
763   MatScalar      *val;
764 
765   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
766   PetscFunctionBegin;
767   if (!baij->donotstash &&  !mat->nooffprocentries) {
768     while (1) {
769       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
770       if (!flg) break;
771 
772       for (i=0; i<n;) {
773         /* Now identify the consecutive vals belonging to the same row */
774         for (j=i,rstart=row[j]; j<n; j++) {
775           if (row[j] != rstart) break;
776         }
777         if (j < n) ncols = j-i;
778         else       ncols = n-i;
779         /* Now assemble all these values with a single function call */
780         PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
781         i    = j;
782       }
783     }
784     PetscCall(MatStashScatterEnd_Private(&mat->stash));
785     /* Now process the block-stash. Since the values are stashed column-oriented,
786        set the roworiented flag to column oriented, and after MatSetValues()
787        restore the original flags */
788     r1 = baij->roworiented;
789     r2 = a->roworiented;
790     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
791 
792     baij->roworiented = PETSC_FALSE;
793     a->roworiented    = PETSC_FALSE;
794 
795     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
796     while (1) {
797       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
798       if (!flg) break;
799 
800       for (i=0; i<n;) {
801         /* Now identify the consecutive vals belonging to the same row */
802         for (j=i,rstart=row[j]; j<n; j++) {
803           if (row[j] != rstart) break;
804         }
805         if (j < n) ncols = j-i;
806         else       ncols = n-i;
807         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
808         i    = j;
809       }
810     }
811     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
812 
813     baij->roworiented = r1;
814     a->roworiented    = r2;
815 
816     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
817   }
818 
819   PetscCall(MatAssemblyBegin(baij->A,mode));
820   PetscCall(MatAssemblyEnd(baij->A,mode));
821 
822   /* determine if any processor has disassembled, if so we must
823      also disassemble ourselves, in order that we may reassemble. */
824   /*
825      if nonzero structure of submatrix B cannot change then we know that
826      no processor disassembled thus we can skip this stuff
827   */
828   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
829     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
830     if (mat->was_assembled && !other_disassembled) {
831       PetscCall(MatDisAssemble_MPISBAIJ(mat));
832     }
833   }
834 
835   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
836     PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
837   }
838   PetscCall(MatAssemblyBegin(baij->B,mode));
839   PetscCall(MatAssemblyEnd(baij->B,mode));
840 
841   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
842 
843   baij->rowvalues = NULL;
844 
845   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
846   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
847     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
848     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
849   }
850   PetscFunctionReturn(0);
851 }
852 
853 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
854 #include <petscdraw.h>
855 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
856 {
857   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
858   PetscInt          bs   = mat->rmap->bs;
859   PetscMPIInt       rank = baij->rank;
860   PetscBool         iascii,isdraw;
861   PetscViewer       sviewer;
862   PetscViewerFormat format;
863 
864   PetscFunctionBegin;
865   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
866   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
867   if (iascii) {
868     PetscCall(PetscViewerGetFormat(viewer,&format));
869     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
870       MatInfo info;
871       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
872       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
873       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
874       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
875       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
876       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
877       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
878       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
879       PetscCall(PetscViewerFlush(viewer));
880       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
881       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
882       PetscCall(VecScatterView(baij->Mvctx,viewer));
883       PetscFunctionReturn(0);
884     } else if (format == PETSC_VIEWER_ASCII_INFO) {
885       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
886       PetscFunctionReturn(0);
887     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
888       PetscFunctionReturn(0);
889     }
890   }
891 
892   if (isdraw) {
893     PetscDraw draw;
894     PetscBool isnull;
895     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
896     PetscCall(PetscDrawIsNull(draw,&isnull));
897     if (isnull) PetscFunctionReturn(0);
898   }
899 
900   {
901     /* assemble the entire matrix onto first processor. */
902     Mat          A;
903     Mat_SeqSBAIJ *Aloc;
904     Mat_SeqBAIJ  *Bloc;
905     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
906     MatScalar    *a;
907     const char   *matname;
908 
909     /* Should this be the same type as mat? */
910     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
911     if (rank == 0) {
912       PetscCall(MatSetSizes(A,M,N,M,N));
913     } else {
914       PetscCall(MatSetSizes(A,0,0,M,N));
915     }
916     PetscCall(MatSetType(A,MATMPISBAIJ));
917     PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
918     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
919     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
920 
921     /* copy over the A part */
922     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
923     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
924     PetscCall(PetscMalloc1(bs,&rvals));
925 
926     for (i=0; i<mbs; i++) {
927       rvals[0] = bs*(baij->rstartbs + i);
928       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
929       for (j=ai[i]; j<ai[i+1]; j++) {
930         col = (baij->cstartbs+aj[j])*bs;
931         for (k=0; k<bs; k++) {
932           PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
933           col++;
934           a += bs;
935         }
936       }
937     }
938     /* copy over the B part */
939     Bloc = (Mat_SeqBAIJ*)baij->B->data;
940     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
941     for (i=0; i<mbs; i++) {
942 
943       rvals[0] = bs*(baij->rstartbs + i);
944       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
945       for (j=ai[i]; j<ai[i+1]; j++) {
946         col = baij->garray[aj[j]]*bs;
947         for (k=0; k<bs; k++) {
948           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
949           col++;
950           a += bs;
951         }
952       }
953     }
954     PetscCall(PetscFree(rvals));
955     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
956     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
957     /*
958        Everyone has to call to draw the matrix since the graphics waits are
959        synchronized across all processors that share the PetscDraw object
960     */
961     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
962     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
963     if (rank == 0) {
964       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname));
965       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer));
966     }
967     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
968     PetscCall(PetscViewerFlush(viewer));
969     PetscCall(MatDestroy(&A));
970   }
971   PetscFunctionReturn(0);
972 }
973 
974 /* Used for both MPIBAIJ and MPISBAIJ matrices */
975 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
976 
977 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
978 {
979   PetscBool      iascii,isdraw,issocket,isbinary;
980 
981   PetscFunctionBegin;
982   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
983   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
984   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
985   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
986   if (iascii || isdraw || issocket) {
987     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer));
988   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat,viewer));
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
993 {
994   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
995 
996   PetscFunctionBegin;
997 #if defined(PETSC_USE_LOG)
998   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
999 #endif
1000   PetscCall(MatStashDestroy_Private(&mat->stash));
1001   PetscCall(MatStashDestroy_Private(&mat->bstash));
1002   PetscCall(MatDestroy(&baij->A));
1003   PetscCall(MatDestroy(&baij->B));
1004 #if defined(PETSC_USE_CTABLE)
1005   PetscCall(PetscTableDestroy(&baij->colmap));
1006 #else
1007   PetscCall(PetscFree(baij->colmap));
1008 #endif
1009   PetscCall(PetscFree(baij->garray));
1010   PetscCall(VecDestroy(&baij->lvec));
1011   PetscCall(VecScatterDestroy(&baij->Mvctx));
1012   PetscCall(VecDestroy(&baij->slvec0));
1013   PetscCall(VecDestroy(&baij->slvec0b));
1014   PetscCall(VecDestroy(&baij->slvec1));
1015   PetscCall(VecDestroy(&baij->slvec1a));
1016   PetscCall(VecDestroy(&baij->slvec1b));
1017   PetscCall(VecScatterDestroy(&baij->sMvctx));
1018   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1019   PetscCall(PetscFree(baij->barray));
1020   PetscCall(PetscFree(baij->hd));
1021   PetscCall(VecDestroy(&baij->diag));
1022   PetscCall(VecDestroy(&baij->bb1));
1023   PetscCall(VecDestroy(&baij->xx1));
1024 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1025   PetscCall(PetscFree(baij->setvaluescopy));
1026 #endif
1027   PetscCall(PetscFree(baij->in_loc));
1028   PetscCall(PetscFree(baij->v_loc));
1029   PetscCall(PetscFree(baij->rangebs));
1030   PetscCall(PetscFree(mat->data));
1031 
1032   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1033   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1034   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1035   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL));
1036   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL));
1037 #if defined(PETSC_HAVE_ELEMENTAL)
1038   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL));
1039 #endif
1040 #if defined(PETSC_HAVE_SCALAPACK)
1041   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL));
1042 #endif
1043   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL));
1044   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL));
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1049 {
1050   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1051   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1052   PetscScalar       *from;
1053   const PetscScalar *x;
1054 
1055   PetscFunctionBegin;
1056   /* diagonal part */
1057   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1058   PetscCall(VecSet(a->slvec1b,0.0));
1059 
1060   /* subdiagonal part */
1061   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1062   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1063 
1064   /* copy x into the vec slvec0 */
1065   PetscCall(VecGetArray(a->slvec0,&from));
1066   PetscCall(VecGetArrayRead(xx,&x));
1067 
1068   PetscCall(PetscArraycpy(from,x,bs*mbs));
1069   PetscCall(VecRestoreArray(a->slvec0,&from));
1070   PetscCall(VecRestoreArrayRead(xx,&x));
1071 
1072   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1073   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1074   /* supperdiagonal part */
1075   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1082   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1083   PetscScalar       *from;
1084   const PetscScalar *x;
1085 
1086   PetscFunctionBegin;
1087   /* diagonal part */
1088   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1089   PetscCall(VecSet(a->slvec1b,0.0));
1090 
1091   /* subdiagonal part */
1092   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1093 
1094   /* copy x into the vec slvec0 */
1095   PetscCall(VecGetArray(a->slvec0,&from));
1096   PetscCall(VecGetArrayRead(xx,&x));
1097 
1098   PetscCall(PetscArraycpy(from,x,bs*mbs));
1099   PetscCall(VecRestoreArray(a->slvec0,&from));
1100   PetscCall(VecRestoreArrayRead(xx,&x));
1101 
1102   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1103   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1104   /* supperdiagonal part */
1105   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1112   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1113   PetscScalar       *from,zero=0.0;
1114   const PetscScalar *x;
1115 
1116   PetscFunctionBegin;
1117   /* diagonal part */
1118   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1119   PetscCall(VecSet(a->slvec1b,zero));
1120 
1121   /* subdiagonal part */
1122   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1123   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1124 
1125   /* copy x into the vec slvec0 */
1126   PetscCall(VecGetArray(a->slvec0,&from));
1127   PetscCall(VecGetArrayRead(xx,&x));
1128   PetscCall(PetscArraycpy(from,x,bs*mbs));
1129   PetscCall(VecRestoreArray(a->slvec0,&from));
1130 
1131   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1132   PetscCall(VecRestoreArrayRead(xx,&x));
1133   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1134 
1135   /* supperdiagonal part */
1136   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1141 {
1142   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1143   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1144   PetscScalar       *from,zero=0.0;
1145   const PetscScalar *x;
1146 
1147   PetscFunctionBegin;
1148   /* diagonal part */
1149   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1150   PetscCall(VecSet(a->slvec1b,zero));
1151 
1152   /* subdiagonal part */
1153   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1154 
1155   /* copy x into the vec slvec0 */
1156   PetscCall(VecGetArray(a->slvec0,&from));
1157   PetscCall(VecGetArrayRead(xx,&x));
1158   PetscCall(PetscArraycpy(from,x,bs*mbs));
1159   PetscCall(VecRestoreArray(a->slvec0,&from));
1160 
1161   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1162   PetscCall(VecRestoreArrayRead(xx,&x));
1163   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1164 
1165   /* supperdiagonal part */
1166   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 /*
1171   This only works correctly for square matrices where the subblock A->A is the
1172    diagonal block
1173 */
1174 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1175 {
1176   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1177 
1178   PetscFunctionBegin;
1179   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1180   PetscCall(MatGetDiagonal(a->A,v));
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1185 {
1186   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1187 
1188   PetscFunctionBegin;
1189   PetscCall(MatScale(a->A,aa));
1190   PetscCall(MatScale(a->B,aa));
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1195 {
1196   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1197   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1198   PetscInt     bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1199   PetscInt     nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1200   PetscInt    *cmap,*idx_p,cstart = mat->rstartbs;
1201 
1202   PetscFunctionBegin;
1203   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1204   mat->getrowactive = PETSC_TRUE;
1205 
1206   if (!mat->rowvalues && (idx || v)) {
1207     /*
1208         allocate enough space to hold information from the longest row.
1209     */
1210     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1211     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1212     PetscInt     max = 1,mbs = mat->mbs,tmp;
1213     for (i=0; i<mbs; i++) {
1214       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1215       if (max < tmp) max = tmp;
1216     }
1217     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1218   }
1219 
1220   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1221   lrow = row - brstart;  /* local row index */
1222 
1223   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1224   if (!v)   {pvA = NULL; pvB = NULL;}
1225   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1226   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1227   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1228   nztot = nzA + nzB;
1229 
1230   cmap = mat->garray;
1231   if (v  || idx) {
1232     if (nztot) {
1233       /* Sort by increasing column numbers, assuming A and B already sorted */
1234       PetscInt imark = -1;
1235       if (v) {
1236         *v = v_p = mat->rowvalues;
1237         for (i=0; i<nzB; i++) {
1238           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1239           else break;
1240         }
1241         imark = i;
1242         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1243         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1244       }
1245       if (idx) {
1246         *idx = idx_p = mat->rowindices;
1247         if (imark > -1) {
1248           for (i=0; i<imark; i++) {
1249             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1250           }
1251         } else {
1252           for (i=0; i<nzB; i++) {
1253             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1254             else break;
1255           }
1256           imark = i;
1257         }
1258         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1259         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1260       }
1261     } else {
1262       if (idx) *idx = NULL;
1263       if (v)   *v   = NULL;
1264     }
1265   }
1266   *nz  = nztot;
1267   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1268   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1273 {
1274   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1275 
1276   PetscFunctionBegin;
1277   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1278   baij->getrowactive = PETSC_FALSE;
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1283 {
1284   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1285   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1286 
1287   PetscFunctionBegin;
1288   aA->getrow_utriangular = PETSC_TRUE;
1289   PetscFunctionReturn(0);
1290 }
1291 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1292 {
1293   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1294   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1295 
1296   PetscFunctionBegin;
1297   aA->getrow_utriangular = PETSC_FALSE;
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1302 {
1303   PetscFunctionBegin;
1304   if (PetscDefined(USE_COMPLEX)) {
1305     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
1306 
1307     PetscCall(MatConjugate(a->A));
1308     PetscCall(MatConjugate(a->B));
1309   }
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1314 {
1315   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1316 
1317   PetscFunctionBegin;
1318   PetscCall(MatRealPart(a->A));
1319   PetscCall(MatRealPart(a->B));
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1324 {
1325   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1326 
1327   PetscFunctionBegin;
1328   PetscCall(MatImaginaryPart(a->A));
1329   PetscCall(MatImaginaryPart(a->B));
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1334    Input: isrow       - distributed(parallel),
1335           iscol_local - locally owned (seq)
1336 */
1337 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1338 {
1339   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1340   const PetscInt *ptr1,*ptr2;
1341 
1342   PetscFunctionBegin;
1343   PetscCall(ISGetLocalSize(isrow,&sz1));
1344   PetscCall(ISGetLocalSize(iscol_local,&sz2));
1345   if (sz1 > sz2) {
1346     *flg = PETSC_FALSE;
1347     PetscFunctionReturn(0);
1348   }
1349 
1350   PetscCall(ISGetIndices(isrow,&ptr1));
1351   PetscCall(ISGetIndices(iscol_local,&ptr2));
1352 
1353   PetscCall(PetscMalloc1(sz1,&a1));
1354   PetscCall(PetscMalloc1(sz2,&a2));
1355   PetscCall(PetscArraycpy(a1,ptr1,sz1));
1356   PetscCall(PetscArraycpy(a2,ptr2,sz2));
1357   PetscCall(PetscSortInt(sz1,a1));
1358   PetscCall(PetscSortInt(sz2,a2));
1359 
1360   nmatch=0;
1361   k     = 0;
1362   for (i=0; i<sz1; i++) {
1363     for (j=k; j<sz2; j++) {
1364       if (a1[i] == a2[j]) {
1365         k = j; nmatch++;
1366         break;
1367       }
1368     }
1369   }
1370   PetscCall(ISRestoreIndices(isrow,&ptr1));
1371   PetscCall(ISRestoreIndices(iscol_local,&ptr2));
1372   PetscCall(PetscFree(a1));
1373   PetscCall(PetscFree(a2));
1374   if (nmatch < sz1) {
1375     *flg = PETSC_FALSE;
1376   } else {
1377     *flg = PETSC_TRUE;
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1383 {
1384   IS             iscol_local;
1385   PetscInt       csize;
1386   PetscBool      isequal;
1387 
1388   PetscFunctionBegin;
1389   PetscCall(ISGetLocalSize(iscol,&csize));
1390   if (call == MAT_REUSE_MATRIX) {
1391     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1392     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1393   } else {
1394     PetscBool issorted;
1395 
1396     PetscCall(ISAllGather(iscol,&iscol_local));
1397     PetscCall(ISEqual_private(isrow,iscol_local,&isequal));
1398     PetscCall(ISSorted(iscol_local, &issorted));
1399     PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted");
1400   }
1401 
1402   /* now call MatCreateSubMatrix_MPIBAIJ() */
1403   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1404   if (call == MAT_INITIAL_MATRIX) {
1405     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1406     PetscCall(ISDestroy(&iscol_local));
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(MatZeroEntries(l->A));
1417   PetscCall(MatZeroEntries(l->B));
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1422 {
1423   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1424   Mat            A  = a->A,B = a->B;
1425   PetscLogDouble isend[5],irecv[5];
1426 
1427   PetscFunctionBegin;
1428   info->block_size = (PetscReal)matin->rmap->bs;
1429 
1430   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1431 
1432   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1433   isend[3] = info->memory;  isend[4] = info->mallocs;
1434 
1435   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1436 
1437   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1438   isend[3] += info->memory;  isend[4] += info->mallocs;
1439   if (flag == MAT_LOCAL) {
1440     info->nz_used      = isend[0];
1441     info->nz_allocated = isend[1];
1442     info->nz_unneeded  = isend[2];
1443     info->memory       = isend[3];
1444     info->mallocs      = isend[4];
1445   } else if (flag == MAT_GLOBAL_MAX) {
1446     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else if (flag == MAT_GLOBAL_SUM) {
1454     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1455 
1456     info->nz_used      = irecv[0];
1457     info->nz_allocated = irecv[1];
1458     info->nz_unneeded  = irecv[2];
1459     info->memory       = irecv[3];
1460     info->mallocs      = irecv[4];
1461   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1462   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1463   info->fill_ratio_needed = 0;
1464   info->factor_mallocs    = 0;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1469 {
1470   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1471   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1472 
1473   PetscFunctionBegin;
1474   switch (op) {
1475   case MAT_NEW_NONZERO_LOCATIONS:
1476   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1477   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1478   case MAT_KEEP_NONZERO_PATTERN:
1479   case MAT_SUBMAT_SINGLEIS:
1480   case MAT_NEW_NONZERO_LOCATION_ERR:
1481     MatCheckPreallocated(A,1);
1482     PetscCall(MatSetOption(a->A,op,flg));
1483     PetscCall(MatSetOption(a->B,op,flg));
1484     break;
1485   case MAT_ROW_ORIENTED:
1486     MatCheckPreallocated(A,1);
1487     a->roworiented = flg;
1488 
1489     PetscCall(MatSetOption(a->A,op,flg));
1490     PetscCall(MatSetOption(a->B,op,flg));
1491     break;
1492   case MAT_FORCE_DIAGONAL_ENTRIES:
1493   case MAT_SORTED_FULL:
1494     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1495     break;
1496   case MAT_IGNORE_OFF_PROC_ENTRIES:
1497     a->donotstash = flg;
1498     break;
1499   case MAT_USE_HASH_TABLE:
1500     a->ht_flag = flg;
1501     break;
1502   case MAT_HERMITIAN:
1503     MatCheckPreallocated(A,1);
1504     PetscCall(MatSetOption(a->A,op,flg));
1505 #if defined(PETSC_USE_COMPLEX)
1506     if (flg) { /* need different mat-vec ops */
1507       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1508       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1509       A->ops->multtranspose    = NULL;
1510       A->ops->multtransposeadd = NULL;
1511       A->symmetric = PETSC_FALSE;
1512     }
1513 #endif
1514     break;
1515   case MAT_SPD:
1516   case MAT_SYMMETRIC:
1517     MatCheckPreallocated(A,1);
1518     PetscCall(MatSetOption(a->A,op,flg));
1519 #if defined(PETSC_USE_COMPLEX)
1520     if (flg) { /* restore to use default mat-vec ops */
1521       A->ops->mult             = MatMult_MPISBAIJ;
1522       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1523       A->ops->multtranspose    = MatMult_MPISBAIJ;
1524       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1525     }
1526 #endif
1527     break;
1528   case MAT_STRUCTURALLY_SYMMETRIC:
1529     MatCheckPreallocated(A,1);
1530     PetscCall(MatSetOption(a->A,op,flg));
1531     break;
1532   case MAT_SYMMETRY_ETERNAL:
1533     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1534     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1535     break;
1536   case MAT_IGNORE_LOWER_TRIANGULAR:
1537     aA->ignore_ltriangular = flg;
1538     break;
1539   case MAT_ERROR_LOWER_TRIANGULAR:
1540     aA->ignore_ltriangular = flg;
1541     break;
1542   case MAT_GETROW_UPPERTRIANGULAR:
1543     aA->getrow_utriangular = flg;
1544     break;
1545   default:
1546     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1547   }
1548   PetscFunctionReturn(0);
1549 }
1550 
1551 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1552 {
1553   PetscFunctionBegin;
1554   if (reuse == MAT_INITIAL_MATRIX) {
1555     PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B));
1556   }  else if (reuse == MAT_REUSE_MATRIX) {
1557     PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN));
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1563 {
1564   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1565   Mat            a     = baij->A, b=baij->B;
1566   PetscInt       nv,m,n;
1567   PetscBool      flg;
1568 
1569   PetscFunctionBegin;
1570   if (ll != rr) {
1571     PetscCall(VecEqual(ll,rr,&flg));
1572     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same");
1573   }
1574   if (!ll) PetscFunctionReturn(0);
1575 
1576   PetscCall(MatGetLocalSize(mat,&m,&n));
1577   PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n);
1578 
1579   PetscCall(VecGetLocalSize(rr,&nv));
1580   PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1581 
1582   PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1583 
1584   /* left diagonalscale the off-diagonal part */
1585   PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1586 
1587   /* scale the diagonal part */
1588   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1589 
1590   /* right diagonalscale the off-diagonal part */
1591   PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1592   PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1597 {
1598   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1599 
1600   PetscFunctionBegin;
1601   PetscCall(MatSetUnfactored(a->A));
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1606 
1607 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1608 {
1609   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1610   Mat            a,b,c,d;
1611   PetscBool      flg;
1612 
1613   PetscFunctionBegin;
1614   a = matA->A; b = matA->B;
1615   c = matB->A; d = matB->B;
1616 
1617   PetscCall(MatEqual(a,c,&flg));
1618   if (flg) {
1619     PetscCall(MatEqual(b,d,&flg));
1620   }
1621   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1626 {
1627   PetscBool      isbaij;
1628 
1629   PetscFunctionBegin;
1630   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,""));
1631   PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1632   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1633   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1634     PetscCall(MatGetRowUpperTriangular(A));
1635     PetscCall(MatCopy_Basic(A,B,str));
1636     PetscCall(MatRestoreRowUpperTriangular(A));
1637   } else {
1638     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1639     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1640 
1641     PetscCall(MatCopy(a->A,b->A,str));
1642     PetscCall(MatCopy(a->B,b->B,str));
1643   }
1644   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1649 {
1650   PetscFunctionBegin;
1651   PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1656 {
1657   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1658   PetscBLASInt   bnz,one=1;
1659   Mat_SeqSBAIJ   *xa,*ya;
1660   Mat_SeqBAIJ    *xb,*yb;
1661 
1662   PetscFunctionBegin;
1663   if (str == SAME_NONZERO_PATTERN) {
1664     PetscScalar alpha = a;
1665     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1666     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1667     PetscCall(PetscBLASIntCast(xa->nz,&bnz));
1668     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1669     xb   = (Mat_SeqBAIJ*)xx->B->data;
1670     yb   = (Mat_SeqBAIJ*)yy->B->data;
1671     PetscCall(PetscBLASIntCast(xb->nz,&bnz));
1672     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1673     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1674   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1675     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
1676     PetscCall(MatAXPY_Basic(Y,a,X,str));
1677     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
1678   } else {
1679     Mat      B;
1680     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1681     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1682     PetscCall(MatGetRowUpperTriangular(X));
1683     PetscCall(MatGetRowUpperTriangular(Y));
1684     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1685     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1686     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1687     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1688     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1689     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1690     PetscCall(MatSetType(B,MATMPISBAIJ));
1691     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d));
1692     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1693     PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1694     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1695     PetscCall(MatHeaderMerge(Y,&B));
1696     PetscCall(PetscFree(nnz_d));
1697     PetscCall(PetscFree(nnz_o));
1698     PetscCall(MatRestoreRowUpperTriangular(X));
1699     PetscCall(MatRestoreRowUpperTriangular(Y));
1700   }
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1705 {
1706   PetscInt       i;
1707   PetscBool      flg;
1708 
1709   PetscFunctionBegin;
1710   PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */
1711   for (i=0; i<n; i++) {
1712     PetscCall(ISEqual(irow[i],icol[i],&flg));
1713     if (!flg) {
1714       PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1715     }
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1721 {
1722   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1723   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1724 
1725   PetscFunctionBegin;
1726   if (!Y->preallocated) {
1727     PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
1728   } else if (!aij->nz) {
1729     PetscInt nonew = aij->nonew;
1730     PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
1731     aij->nonew = nonew;
1732   }
1733   PetscCall(MatShift_Basic(Y,a));
1734   PetscFunctionReturn(0);
1735 }
1736 
1737 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1738 {
1739   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1740 
1741   PetscFunctionBegin;
1742   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1743   PetscCall(MatMissingDiagonal(a->A,missing,d));
1744   if (d) {
1745     PetscInt rstart;
1746     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1747     *d += rstart/A->rmap->bs;
1748 
1749   }
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1754 {
1755   PetscFunctionBegin;
1756   *a = ((Mat_MPISBAIJ*)A->data)->A;
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 /* -------------------------------------------------------------------*/
1761 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1762                                        MatGetRow_MPISBAIJ,
1763                                        MatRestoreRow_MPISBAIJ,
1764                                        MatMult_MPISBAIJ,
1765                                /*  4*/ MatMultAdd_MPISBAIJ,
1766                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1767                                        MatMultAdd_MPISBAIJ,
1768                                        NULL,
1769                                        NULL,
1770                                        NULL,
1771                                /* 10*/ NULL,
1772                                        NULL,
1773                                        NULL,
1774                                        MatSOR_MPISBAIJ,
1775                                        MatTranspose_MPISBAIJ,
1776                                /* 15*/ MatGetInfo_MPISBAIJ,
1777                                        MatEqual_MPISBAIJ,
1778                                        MatGetDiagonal_MPISBAIJ,
1779                                        MatDiagonalScale_MPISBAIJ,
1780                                        MatNorm_MPISBAIJ,
1781                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1782                                        MatAssemblyEnd_MPISBAIJ,
1783                                        MatSetOption_MPISBAIJ,
1784                                        MatZeroEntries_MPISBAIJ,
1785                                /* 24*/ NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                        NULL,
1790                                /* 29*/ MatSetUp_MPISBAIJ,
1791                                        NULL,
1792                                        NULL,
1793                                        MatGetDiagonalBlock_MPISBAIJ,
1794                                        NULL,
1795                                /* 34*/ MatDuplicate_MPISBAIJ,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                        NULL,
1800                                /* 39*/ MatAXPY_MPISBAIJ,
1801                                        MatCreateSubMatrices_MPISBAIJ,
1802                                        MatIncreaseOverlap_MPISBAIJ,
1803                                        MatGetValues_MPISBAIJ,
1804                                        MatCopy_MPISBAIJ,
1805                                /* 44*/ NULL,
1806                                        MatScale_MPISBAIJ,
1807                                        MatShift_MPISBAIJ,
1808                                        NULL,
1809                                        NULL,
1810                                /* 49*/ NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                        NULL,
1815                                /* 54*/ NULL,
1816                                        NULL,
1817                                        MatSetUnfactored_MPISBAIJ,
1818                                        NULL,
1819                                        MatSetValuesBlocked_MPISBAIJ,
1820                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                        NULL,
1825                                /* 64*/ NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                        NULL,
1830                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1831                                        NULL,
1832                                        MatConvert_MPISBAIJ_Basic,
1833                                        NULL,
1834                                        NULL,
1835                                /* 74*/ NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                        NULL,
1840                                /* 79*/ NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                        MatLoad_MPISBAIJ,
1845                                /* 84*/ NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                        NULL,
1850                                /* 89*/ NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                        NULL,
1855                                /* 94*/ NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                        NULL,
1860                                /* 99*/ NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        MatConjugate_MPISBAIJ,
1864                                        NULL,
1865                                /*104*/ NULL,
1866                                        MatRealPart_MPISBAIJ,
1867                                        MatImaginaryPart_MPISBAIJ,
1868                                        MatGetRowUpperTriangular_MPISBAIJ,
1869                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1870                                /*109*/ NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        NULL,
1874                                        MatMissingDiagonal_MPISBAIJ,
1875                                /*114*/ NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                        NULL,
1880                                /*119*/ NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                        NULL,
1885                                /*124*/ NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                        NULL,
1890                                /*129*/ NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                        NULL,
1895                                /*134*/ NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                        NULL,
1900                                /*139*/ MatSetBlockSizes_Default,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                        NULL,
1905                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL
1909 };
1910 
1911 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1912 {
1913   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1914   PetscInt       i,mbs,Mbs;
1915   PetscMPIInt    size;
1916 
1917   PetscFunctionBegin;
1918   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
1919   PetscCall(PetscLayoutSetUp(B->rmap));
1920   PetscCall(PetscLayoutSetUp(B->cmap));
1921   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
1922   PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N);
1923   PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n);
1924 
1925   mbs = B->rmap->n/bs;
1926   Mbs = B->rmap->N/bs;
1927   PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs);
1928 
1929   B->rmap->bs = bs;
1930   b->bs2      = bs*bs;
1931   b->mbs      = mbs;
1932   b->Mbs      = Mbs;
1933   b->nbs      = B->cmap->n/bs;
1934   b->Nbs      = B->cmap->N/bs;
1935 
1936   for (i=0; i<=b->size; i++) {
1937     b->rangebs[i] = B->rmap->range[i]/bs;
1938   }
1939   b->rstartbs = B->rmap->rstart/bs;
1940   b->rendbs   = B->rmap->rend/bs;
1941 
1942   b->cstartbs = B->cmap->rstart/bs;
1943   b->cendbs   = B->cmap->rend/bs;
1944 
1945 #if defined(PETSC_USE_CTABLE)
1946   PetscCall(PetscTableDestroy(&b->colmap));
1947 #else
1948   PetscCall(PetscFree(b->colmap));
1949 #endif
1950   PetscCall(PetscFree(b->garray));
1951   PetscCall(VecDestroy(&b->lvec));
1952   PetscCall(VecScatterDestroy(&b->Mvctx));
1953   PetscCall(VecDestroy(&b->slvec0));
1954   PetscCall(VecDestroy(&b->slvec0b));
1955   PetscCall(VecDestroy(&b->slvec1));
1956   PetscCall(VecDestroy(&b->slvec1a));
1957   PetscCall(VecDestroy(&b->slvec1b));
1958   PetscCall(VecScatterDestroy(&b->sMvctx));
1959 
1960   /* Because the B will have been resized we simply destroy it and create a new one each time */
1961   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
1962   PetscCall(MatDestroy(&b->B));
1963   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
1964   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
1965   PetscCall(MatSetType(b->B,MATSEQBAIJ));
1966   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
1967 
1968   if (!B->preallocated) {
1969     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
1970     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
1971     PetscCall(MatSetType(b->A,MATSEQSBAIJ));
1972     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
1973     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
1974   }
1975 
1976   PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
1977   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
1978 
1979   B->preallocated  = PETSC_TRUE;
1980   B->was_assembled = PETSC_FALSE;
1981   B->assembled     = PETSC_FALSE;
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1986 {
1987   PetscInt       m,rstart,cend;
1988   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
1989   const PetscInt *JJ    =NULL;
1990   PetscScalar    *values=NULL;
1991   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
1992   PetscBool      nooffprocentries;
1993 
1994   PetscFunctionBegin;
1995   PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
1996   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
1997   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
1998   PetscCall(PetscLayoutSetUp(B->rmap));
1999   PetscCall(PetscLayoutSetUp(B->cmap));
2000   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2001   m      = B->rmap->n/bs;
2002   rstart = B->rmap->rstart/bs;
2003   cend   = B->cmap->rend/bs;
2004 
2005   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2006   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2007   for (i=0; i<m; i++) {
2008     nz = ii[i+1] - ii[i];
2009     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2010     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2011     JJ     = jj + ii[i];
2012     bd     = 0;
2013     for (j=0; j<nz; j++) {
2014       if (*JJ >= i + rstart) break;
2015       JJ++;
2016       bd++;
2017     }
2018     d  = 0;
2019     for (; j<nz; j++) {
2020       if (*JJ++ >= cend) break;
2021       d++;
2022     }
2023     d_nnz[i] = d;
2024     o_nnz[i] = nz - d - bd;
2025     nz       = nz - bd;
2026     nz_max = PetscMax(nz_max,nz);
2027   }
2028   PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2029   PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE));
2030   PetscCall(PetscFree2(d_nnz,o_nnz));
2031 
2032   values = (PetscScalar*)V;
2033   if (!values) {
2034     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2035   }
2036   for (i=0; i<m; i++) {
2037     PetscInt          row    = i + rstart;
2038     PetscInt          ncols  = ii[i+1] - ii[i];
2039     const PetscInt    *icols = jj + ii[i];
2040     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2041       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2042       PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2043     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2044       PetscInt j;
2045       for (j=0; j<ncols; j++) {
2046         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2047         PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2048       }
2049     }
2050   }
2051 
2052   if (!V) PetscCall(PetscFree(values));
2053   nooffprocentries    = B->nooffprocentries;
2054   B->nooffprocentries = PETSC_TRUE;
2055   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2056   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2057   B->nooffprocentries = nooffprocentries;
2058 
2059   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 /*MC
2064    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2065    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2066    the matrix is stored.
2067 
2068    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2069    can call MatSetOption(Mat, MAT_HERMITIAN);
2070 
2071    Options Database Keys:
2072 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2073 
2074    Notes:
2075      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2076      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2077 
2078    Level: beginner
2079 
2080 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2081 M*/
2082 
2083 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2084 {
2085   Mat_MPISBAIJ   *b;
2086   PetscBool      flg = PETSC_FALSE;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(PetscNewLog(B,&b));
2090   B->data = (void*)b;
2091   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2092 
2093   B->ops->destroy = MatDestroy_MPISBAIJ;
2094   B->ops->view    = MatView_MPISBAIJ;
2095   B->assembled    = PETSC_FALSE;
2096   B->insertmode   = NOT_SET_VALUES;
2097 
2098   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2099   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2100 
2101   /* build local table of row and column ownerships */
2102   PetscCall(PetscMalloc1(b->size+2,&b->rangebs));
2103 
2104   /* build cache for off array entries formed */
2105   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2106 
2107   b->donotstash  = PETSC_FALSE;
2108   b->colmap      = NULL;
2109   b->garray      = NULL;
2110   b->roworiented = PETSC_TRUE;
2111 
2112   /* stuff used in block assembly */
2113   b->barray = NULL;
2114 
2115   /* stuff used for matrix vector multiply */
2116   b->lvec    = NULL;
2117   b->Mvctx   = NULL;
2118   b->slvec0  = NULL;
2119   b->slvec0b = NULL;
2120   b->slvec1  = NULL;
2121   b->slvec1a = NULL;
2122   b->slvec1b = NULL;
2123   b->sMvctx  = NULL;
2124 
2125   /* stuff for MatGetRow() */
2126   b->rowindices   = NULL;
2127   b->rowvalues    = NULL;
2128   b->getrowactive = PETSC_FALSE;
2129 
2130   /* hash table stuff */
2131   b->ht           = NULL;
2132   b->hd           = NULL;
2133   b->ht_size      = 0;
2134   b->ht_flag      = PETSC_FALSE;
2135   b->ht_fact      = 0;
2136   b->ht_total_ct  = 0;
2137   b->ht_insert_ct = 0;
2138 
2139   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2140   b->ijonly = PETSC_FALSE;
2141 
2142   b->in_loc = NULL;
2143   b->v_loc  = NULL;
2144   b->n_loc  = 0;
2145 
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ));
2147   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ));
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ));
2149   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2150 #if defined(PETSC_HAVE_ELEMENTAL)
2151   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental));
2152 #endif
2153 #if defined(PETSC_HAVE_SCALAPACK)
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK));
2155 #endif
2156   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic));
2157   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic));
2158 
2159   B->symmetric                  = PETSC_TRUE;
2160   B->structurally_symmetric     = PETSC_TRUE;
2161   B->symmetric_set              = PETSC_TRUE;
2162   B->structurally_symmetric_set = PETSC_TRUE;
2163   B->symmetric_eternal          = PETSC_TRUE;
2164 #if defined(PETSC_USE_COMPLEX)
2165   B->hermitian                  = PETSC_FALSE;
2166   B->hermitian_set              = PETSC_FALSE;
2167 #else
2168   B->hermitian                  = PETSC_TRUE;
2169   B->hermitian_set              = PETSC_TRUE;
2170 #endif
2171 
2172   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ));
2173   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2174   PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL));
2175   if (flg) {
2176     PetscReal fact = 1.39;
2177     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2178     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2179     if (fact <= 1.0) fact = 1.39;
2180     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2181     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2182   }
2183   PetscOptionsEnd();
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 /*MC
2188    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2189 
2190    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2191    and MATMPISBAIJ otherwise.
2192 
2193    Options Database Keys:
2194 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2195 
2196   Level: beginner
2197 
2198 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2199 M*/
2200 
2201 /*@C
2202    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2203    the user should preallocate the matrix storage by setting the parameters
2204    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2205    performance can be increased by more than a factor of 50.
2206 
2207    Collective on Mat
2208 
2209    Input Parameters:
2210 +  B - the matrix
2211 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2212           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2213 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2214            submatrix  (same for all local rows)
2215 .  d_nnz - array containing the number of block nonzeros in the various block rows
2216            in the upper triangular and diagonal part of the in diagonal portion of the local
2217            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2218            for the diagonal entry and set a value even if it is zero.
2219 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2220            submatrix (same for all local rows).
2221 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2222            off-diagonal portion of the local submatrix that is right of the diagonal
2223            (possibly different for each block row) or NULL.
2224 
2225    Options Database Keys:
2226 +   -mat_no_unroll - uses code that does not unroll the loops in the
2227                      block calculations (much slower)
2228 -   -mat_block_size - size of the blocks to use
2229 
2230    Notes:
2231 
2232    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2233    than it must be used on all processors that share the object for that argument.
2234 
2235    If the *_nnz parameter is given then the *_nz parameter is ignored
2236 
2237    Storage Information:
2238    For a square global matrix we define each processor's diagonal portion
2239    to be its local rows and the corresponding columns (a square submatrix);
2240    each processor's off-diagonal portion encompasses the remainder of the
2241    local matrix (a rectangular submatrix).
2242 
2243    The user can specify preallocated storage for the diagonal part of
2244    the local submatrix with either d_nz or d_nnz (not both).  Set
2245    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2246    memory allocation.  Likewise, specify preallocated storage for the
2247    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2248 
2249    You can call MatGetInfo() to get information on how effective the preallocation was;
2250    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2251    You can also run with the option -info and look for messages with the string
2252    malloc in them to see if additional memory allocation was needed.
2253 
2254    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2255    the figure below we depict these three local rows and all columns (0-11).
2256 
2257 .vb
2258            0 1 2 3 4 5 6 7 8 9 10 11
2259           --------------------------
2260    row 3  |. . . d d d o o o o  o  o
2261    row 4  |. . . d d d o o o o  o  o
2262    row 5  |. . . d d d o o o o  o  o
2263           --------------------------
2264 .ve
2265 
2266    Thus, any entries in the d locations are stored in the d (diagonal)
2267    submatrix, and any entries in the o locations are stored in the
2268    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2269    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2270 
2271    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2272    plus the diagonal part of the d matrix,
2273    and o_nz should indicate the number of block nonzeros per row in the o matrix
2274 
2275    In general, for PDE problems in which most nonzeros are near the diagonal,
2276    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2277    or you will get TERRIBLE performance; see the users' manual chapter on
2278    matrices.
2279 
2280    Level: intermediate
2281 
2282 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2283 @*/
2284 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2285 {
2286   PetscFunctionBegin;
2287   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2288   PetscValidType(B,1);
2289   PetscValidLogicalCollectiveInt(B,bs,2);
2290   PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 /*@C
2295    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2296    (block compressed row).  For good matrix assembly performance
2297    the user should preallocate the matrix storage by setting the parameters
2298    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2299    performance can be increased by more than a factor of 50.
2300 
2301    Collective
2302 
2303    Input Parameters:
2304 +  comm - MPI communicator
2305 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2306           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2307 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2308            This value should be the same as the local size used in creating the
2309            y vector for the matrix-vector product y = Ax.
2310 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2311            This value should be the same as the local size used in creating the
2312            x vector for the matrix-vector product y = Ax.
2313 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2314 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2315 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2316            submatrix  (same for all local rows)
2317 .  d_nnz - array containing the number of block nonzeros in the various block rows
2318            in the upper triangular portion of the in diagonal portion of the local
2319            (possibly different for each block block row) or NULL.
2320            If you plan to factor the matrix you must leave room for the diagonal entry and
2321            set its value even if it is zero.
2322 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2323            submatrix (same for all local rows).
2324 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2325            off-diagonal portion of the local submatrix (possibly different for
2326            each block row) or NULL.
2327 
2328    Output Parameter:
2329 .  A - the matrix
2330 
2331    Options Database Keys:
2332 +   -mat_no_unroll - uses code that does not unroll the loops in the
2333                      block calculations (much slower)
2334 .   -mat_block_size - size of the blocks to use
2335 -   -mat_mpi - use the parallel matrix data structures even on one processor
2336                (defaults to using SeqBAIJ format on one processor)
2337 
2338    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2339    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2340    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2341 
2342    Notes:
2343    The number of rows and columns must be divisible by blocksize.
2344    This matrix type does not support complex Hermitian operation.
2345 
2346    The user MUST specify either the local or global matrix dimensions
2347    (possibly both).
2348 
2349    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2350    than it must be used on all processors that share the object for that argument.
2351 
2352    If the *_nnz parameter is given then the *_nz parameter is ignored
2353 
2354    Storage Information:
2355    For a square global matrix we define each processor's diagonal portion
2356    to be its local rows and the corresponding columns (a square submatrix);
2357    each processor's off-diagonal portion encompasses the remainder of the
2358    local matrix (a rectangular submatrix).
2359 
2360    The user can specify preallocated storage for the diagonal part of
2361    the local submatrix with either d_nz or d_nnz (not both).  Set
2362    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2363    memory allocation.  Likewise, specify preallocated storage for the
2364    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2365 
2366    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2367    the figure below we depict these three local rows and all columns (0-11).
2368 
2369 .vb
2370            0 1 2 3 4 5 6 7 8 9 10 11
2371           --------------------------
2372    row 3  |. . . d d d o o o o  o  o
2373    row 4  |. . . d d d o o o o  o  o
2374    row 5  |. . . d d d o o o o  o  o
2375           --------------------------
2376 .ve
2377 
2378    Thus, any entries in the d locations are stored in the d (diagonal)
2379    submatrix, and any entries in the o locations are stored in the
2380    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2381    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2382 
2383    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2384    plus the diagonal part of the d matrix,
2385    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2386    In general, for PDE problems in which most nonzeros are near the diagonal,
2387    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2388    or you will get TERRIBLE performance; see the users' manual chapter on
2389    matrices.
2390 
2391    Level: intermediate
2392 
2393 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2394 @*/
2395 
2396 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2397 {
2398   PetscMPIInt    size;
2399 
2400   PetscFunctionBegin;
2401   PetscCall(MatCreate(comm,A));
2402   PetscCall(MatSetSizes(*A,m,n,M,N));
2403   PetscCallMPI(MPI_Comm_size(comm,&size));
2404   if (size > 1) {
2405     PetscCall(MatSetType(*A,MATMPISBAIJ));
2406     PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
2407   } else {
2408     PetscCall(MatSetType(*A,MATSEQSBAIJ));
2409     PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
2410   }
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2415 {
2416   Mat            mat;
2417   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2418   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2419   PetscScalar    *array;
2420 
2421   PetscFunctionBegin;
2422   *newmat = NULL;
2423 
2424   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2425   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2426   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2427   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2428   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2429 
2430   mat->factortype   = matin->factortype;
2431   mat->preallocated = PETSC_TRUE;
2432   mat->assembled    = PETSC_TRUE;
2433   mat->insertmode   = NOT_SET_VALUES;
2434 
2435   a      = (Mat_MPISBAIJ*)mat->data;
2436   a->bs2 = oldmat->bs2;
2437   a->mbs = oldmat->mbs;
2438   a->nbs = oldmat->nbs;
2439   a->Mbs = oldmat->Mbs;
2440   a->Nbs = oldmat->Nbs;
2441 
2442   a->size         = oldmat->size;
2443   a->rank         = oldmat->rank;
2444   a->donotstash   = oldmat->donotstash;
2445   a->roworiented  = oldmat->roworiented;
2446   a->rowindices   = NULL;
2447   a->rowvalues    = NULL;
2448   a->getrowactive = PETSC_FALSE;
2449   a->barray       = NULL;
2450   a->rstartbs     = oldmat->rstartbs;
2451   a->rendbs       = oldmat->rendbs;
2452   a->cstartbs     = oldmat->cstartbs;
2453   a->cendbs       = oldmat->cendbs;
2454 
2455   /* hash table stuff */
2456   a->ht           = NULL;
2457   a->hd           = NULL;
2458   a->ht_size      = 0;
2459   a->ht_flag      = oldmat->ht_flag;
2460   a->ht_fact      = oldmat->ht_fact;
2461   a->ht_total_ct  = 0;
2462   a->ht_insert_ct = 0;
2463 
2464   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2));
2465   if (oldmat->colmap) {
2466 #if defined(PETSC_USE_CTABLE)
2467     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2468 #else
2469     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
2470     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
2471     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
2472 #endif
2473   } else a->colmap = NULL;
2474 
2475   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2476     PetscCall(PetscMalloc1(len,&a->garray));
2477     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2478     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2479   } else a->garray = NULL;
2480 
2481   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
2482   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2483   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2484   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2485   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2486 
2487   PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0));
2488   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2489   PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1));
2490   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2491 
2492   PetscCall(VecGetLocalSize(a->slvec1,&nt));
2493   PetscCall(VecGetArray(a->slvec1,&array));
2494   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a));
2495   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b));
2496   PetscCall(VecRestoreArray(a->slvec1,&array));
2497   PetscCall(VecGetArray(a->slvec0,&array));
2498   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b));
2499   PetscCall(VecRestoreArray(a->slvec0,&array));
2500   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2501   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2502   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b));
2503   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a));
2504   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b));
2505 
2506   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2507   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2508   a->sMvctx = oldmat->sMvctx;
2509   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx));
2510 
2511   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2512   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2513   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2514   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2515   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2516   *newmat = mat;
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2521 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2522 
2523 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2524 {
2525   PetscBool      isbinary;
2526 
2527   PetscFunctionBegin;
2528   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2529   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2530   PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer));
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 /*XXXXX@
2535    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2536 
2537    Input Parameters:
2538 .  mat  - the matrix
2539 .  fact - factor
2540 
2541    Not Collective on Mat, each process can have a different hash factor
2542 
2543    Level: advanced
2544 
2545   Notes:
2546    This can also be set by the command line option: -mat_use_hash_table fact
2547 
2548 .seealso: `MatSetOption()`
2549 @XXXXX*/
2550 
2551 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2552 {
2553   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2554   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2555   PetscReal      atmp;
2556   PetscReal      *work,*svalues,*rvalues;
2557   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2558   PetscMPIInt    rank,size;
2559   PetscInt       *rowners_bs,dest,count,source;
2560   PetscScalar    *va;
2561   MatScalar      *ba;
2562   MPI_Status     stat;
2563 
2564   PetscFunctionBegin;
2565   PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2566   PetscCall(MatGetRowMaxAbs(a->A,v,NULL));
2567   PetscCall(VecGetArray(v,&va));
2568 
2569   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2570   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2571 
2572   bs  = A->rmap->bs;
2573   mbs = a->mbs;
2574   Mbs = a->Mbs;
2575   ba  = b->a;
2576   bi  = b->i;
2577   bj  = b->j;
2578 
2579   /* find ownerships */
2580   rowners_bs = A->rmap->range;
2581 
2582   /* each proc creates an array to be distributed */
2583   PetscCall(PetscCalloc1(bs*Mbs,&work));
2584 
2585   /* row_max for B */
2586   if (rank != size-1) {
2587     for (i=0; i<mbs; i++) {
2588       ncols = bi[1] - bi[0]; bi++;
2589       brow  = bs*i;
2590       for (j=0; j<ncols; j++) {
2591         bcol = bs*(*bj);
2592         for (kcol=0; kcol<bs; kcol++) {
2593           col  = bcol + kcol;                /* local col index */
2594           col += rowners_bs[rank+1];      /* global col index */
2595           for (krow=0; krow<bs; krow++) {
2596             atmp = PetscAbsScalar(*ba); ba++;
2597             row  = brow + krow;   /* local row index */
2598             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2599             if (work[col] < atmp) work[col] = atmp;
2600           }
2601         }
2602         bj++;
2603       }
2604     }
2605 
2606     /* send values to its owners */
2607     for (dest=rank+1; dest<size; dest++) {
2608       svalues = work + rowners_bs[dest];
2609       count   = rowners_bs[dest+1]-rowners_bs[dest];
2610       PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A)));
2611     }
2612   }
2613 
2614   /* receive values */
2615   if (rank) {
2616     rvalues = work;
2617     count   = rowners_bs[rank+1]-rowners_bs[rank];
2618     for (source=0; source<rank; source++) {
2619       PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat));
2620       /* process values */
2621       for (i=0; i<count; i++) {
2622         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2623       }
2624     }
2625   }
2626 
2627   PetscCall(VecRestoreArray(v,&va));
2628   PetscCall(PetscFree(work));
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2633 {
2634   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2635   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2636   PetscScalar       *x,*ptr,*from;
2637   Vec               bb1;
2638   const PetscScalar *b;
2639 
2640   PetscFunctionBegin;
2641   PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
2642   PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2643 
2644   if (flag == SOR_APPLY_UPPER) {
2645     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2646     PetscFunctionReturn(0);
2647   }
2648 
2649   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2650     if (flag & SOR_ZERO_INITIAL_GUESS) {
2651       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx));
2652       its--;
2653     }
2654 
2655     PetscCall(VecDuplicate(bb,&bb1));
2656     while (its--) {
2657 
2658       /* lower triangular part: slvec0b = - B^T*xx */
2659       PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2660 
2661       /* copy xx into slvec0a */
2662       PetscCall(VecGetArray(mat->slvec0,&ptr));
2663       PetscCall(VecGetArray(xx,&x));
2664       PetscCall(PetscArraycpy(ptr,x,bs*mbs));
2665       PetscCall(VecRestoreArray(mat->slvec0,&ptr));
2666 
2667       PetscCall(VecScale(mat->slvec0,-1.0));
2668 
2669       /* copy bb into slvec1a */
2670       PetscCall(VecGetArray(mat->slvec1,&ptr));
2671       PetscCall(VecGetArrayRead(bb,&b));
2672       PetscCall(PetscArraycpy(ptr,b,bs*mbs));
2673       PetscCall(VecRestoreArray(mat->slvec1,&ptr));
2674 
2675       /* set slvec1b = 0 */
2676       PetscCall(VecSet(mat->slvec1b,0.0));
2677 
2678       PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2679       PetscCall(VecRestoreArray(xx,&x));
2680       PetscCall(VecRestoreArrayRead(bb,&b));
2681       PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2682 
2683       /* upper triangular part: bb1 = bb1 - B*x */
2684       PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1));
2685 
2686       /* local diagonal sweep */
2687       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx));
2688     }
2689     PetscCall(VecDestroy(&bb1));
2690   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2691     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2692   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2693     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2694   } else if (flag & SOR_EISENSTAT) {
2695     Vec               xx1;
2696     PetscBool         hasop;
2697     const PetscScalar *diag;
2698     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2699     PetscInt          i,n;
2700 
2701     if (!mat->xx1) {
2702       PetscCall(VecDuplicate(bb,&mat->xx1));
2703       PetscCall(VecDuplicate(bb,&mat->bb1));
2704     }
2705     xx1 = mat->xx1;
2706     bb1 = mat->bb1;
2707 
2708     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
2709 
2710     if (!mat->diag) {
2711       /* this is wrong for same matrix with new nonzero values */
2712       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
2713       PetscCall(MatGetDiagonal(matin,mat->diag));
2714     }
2715     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
2716 
2717     if (hasop) {
2718       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
2719       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2720     } else {
2721       /*
2722           These two lines are replaced by code that may be a bit faster for a good compiler
2723       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2724       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2725       */
2726       PetscCall(VecGetArray(mat->slvec1a,&sl));
2727       PetscCall(VecGetArrayRead(mat->diag,&diag));
2728       PetscCall(VecGetArrayRead(bb,&b));
2729       PetscCall(VecGetArray(xx,&x));
2730       PetscCall(VecGetLocalSize(xx,&n));
2731       if (omega == 1.0) {
2732         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2733         PetscCall(PetscLogFlops(2.0*n));
2734       } else {
2735         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2736         PetscCall(PetscLogFlops(3.0*n));
2737       }
2738       PetscCall(VecRestoreArray(mat->slvec1a,&sl));
2739       PetscCall(VecRestoreArrayRead(mat->diag,&diag));
2740       PetscCall(VecRestoreArrayRead(bb,&b));
2741       PetscCall(VecRestoreArray(xx,&x));
2742     }
2743 
2744     /* multiply off-diagonal portion of matrix */
2745     PetscCall(VecSet(mat->slvec1b,0.0));
2746     PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2747     PetscCall(VecGetArray(mat->slvec0,&from));
2748     PetscCall(VecGetArray(xx,&x));
2749     PetscCall(PetscArraycpy(from,x,bs*mbs));
2750     PetscCall(VecRestoreArray(mat->slvec0,&from));
2751     PetscCall(VecRestoreArray(xx,&x));
2752     PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2753     PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2754     PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a));
2755 
2756     /* local sweep */
2757     PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
2758     PetscCall(VecAXPY(xx,1.0,xx1));
2759   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 /*@
2764      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2765          CSR format the local rows.
2766 
2767    Collective
2768 
2769    Input Parameters:
2770 +  comm - MPI communicator
2771 .  bs - the block size, only a block size of 1 is supported
2772 .  m - number of local rows (Cannot be PETSC_DECIDE)
2773 .  n - This value should be the same as the local size used in creating the
2774        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2775        calculated if N is given) For square matrices n is almost always m.
2776 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2777 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2778 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2779 .   j - column indices
2780 -   a - matrix values
2781 
2782    Output Parameter:
2783 .   mat - the matrix
2784 
2785    Level: intermediate
2786 
2787    Notes:
2788        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2789      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2790      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2791 
2792        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2793 
2794 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2795           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2796 @*/
2797 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2798 {
2799   PetscFunctionBegin;
2800   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2801   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2802   PetscCall(MatCreate(comm,mat));
2803   PetscCall(MatSetSizes(*mat,m,n,M,N));
2804   PetscCall(MatSetType(*mat,MATMPISBAIJ));
2805   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a));
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 /*@C
2810    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2811 
2812    Collective
2813 
2814    Input Parameters:
2815 +  B - the matrix
2816 .  bs - the block size
2817 .  i - the indices into j for the start of each local row (starts with zero)
2818 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2819 -  v - optional values in the matrix
2820 
2821    Level: advanced
2822 
2823    Notes:
2824    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2825    and usually the numerical values as well
2826 
2827    Any entries below the diagonal are ignored
2828 
2829 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2830 @*/
2831 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2832 {
2833   PetscFunctionBegin;
2834   PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2839 {
2840   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2841   PetscInt       *indx;
2842   PetscScalar    *values;
2843 
2844   PetscFunctionBegin;
2845   PetscCall(MatGetSize(inmat,&m,&N));
2846   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2847     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2848     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2849     PetscInt       *bindx,rmax=a->rmax,j;
2850     PetscMPIInt    rank,size;
2851 
2852     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2853     mbs = m/bs; Nbs = N/cbs;
2854     if (n == PETSC_DECIDE) {
2855       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
2856     }
2857     nbs = n/cbs;
2858 
2859     PetscCall(PetscMalloc1(rmax,&bindx));
2860     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
2861 
2862     PetscCallMPI(MPI_Comm_rank(comm,&rank));
2863     PetscCallMPI(MPI_Comm_rank(comm,&size));
2864     if (rank == size-1) {
2865       /* Check sum(nbs) = Nbs */
2866       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
2867     }
2868 
2869     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2870     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2871     for (i=0; i<mbs; i++) {
2872       PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
2873       nnz  = nnz/bs;
2874       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2875       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
2876       PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL));
2877     }
2878     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2879     PetscCall(PetscFree(bindx));
2880 
2881     PetscCall(MatCreate(comm,outmat));
2882     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
2883     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
2884     PetscCall(MatSetType(*outmat,MATSBAIJ));
2885     PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz));
2886     PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
2887     MatPreallocateEnd(dnz,onz);
2888   }
2889 
2890   /* numeric phase */
2891   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2892   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
2893 
2894   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2895   for (i=0; i<m; i++) {
2896     PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2897     Ii   = i + rstart;
2898     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
2899     PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2900   }
2901   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2902   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
2903   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
2904   PetscFunctionReturn(0);
2905 }
2906