xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 9fe8556826ed0dc8a9fe15a3f2d3eda474dc9338)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21 
22   PetscFunctionBegin;
23   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
24   PetscValidLogicalCollectiveInt(B,nm,2);
25   for (i = 0; i < nm; i++) {
26     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
27     PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong));
28     PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
29   }
30   PetscValidLogicalCollectiveBool(B,fill,5);
31   PetscCall(MatGetBlockSize(B,&bs));
32   PetscCall(MatGetSize(B,&M,&N));
33   PetscCall(MatGetLocalSize(B,&m,&n));
34   PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator));
35   PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
36   PetscCall(MatSetBlockSize(preallocator,bs));
37   PetscCall(MatSetSizes(preallocator,m,n,M,N));
38   PetscCall(MatSetUp(preallocator));
39   PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
40   for (r = rstart; r < rend; ++r) {
41     PetscInt          ncols;
42     const PetscInt    *row;
43     const PetscScalar *vals;
44 
45     for (i = 0; i < nm; i++) {
46       PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals));
47       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
48       if (symm && symm[i]) {
49         PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES));
50       }
51       PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals));
52     }
53   }
54   PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
55   PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
56   PetscCall(MatPreallocatorPreallocate(preallocator,fill,B));
57   PetscCall(MatDestroy(&preallocator));
58   PetscFunctionReturn(0);
59 }
60 
61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
62 {
63   Mat            B;
64   PetscInt       r;
65 
66   PetscFunctionBegin;
67   if (reuse != MAT_REUSE_MATRIX) {
68     PetscBool symm = PETSC_TRUE,isdense;
69     PetscInt  bs;
70 
71     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
72     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
73     PetscCall(MatSetType(B,newtype));
74     PetscCall(MatGetBlockSize(A,&bs));
75     PetscCall(MatSetBlockSize(B,bs));
76     PetscCall(PetscLayoutSetUp(B->rmap));
77     PetscCall(PetscLayoutSetUp(B->cmap));
78     PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,""));
79     if (!isdense) {
80       PetscCall(MatGetRowUpperTriangular(A));
81       PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE));
82       PetscCall(MatRestoreRowUpperTriangular(A));
83     } else {
84       PetscCall(MatSetUp(B));
85     }
86   } else {
87     B    = *newmat;
88     PetscCall(MatZeroEntries(B));
89   }
90 
91   PetscCall(MatGetRowUpperTriangular(A));
92   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
93     PetscInt          ncols;
94     const PetscInt    *row;
95     const PetscScalar *vals;
96 
97     PetscCall(MatGetRow(A,r,&ncols,&row,&vals));
98     PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES));
99 #if defined(PETSC_USE_COMPLEX)
100     if (A->hermitian == PETSC_BOOL3_TRUE) {
101       PetscInt i;
102       for (i = 0; i < ncols; i++) {
103         PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES));
104       }
105     } else {
106       PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
107     }
108 #else
109     PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
110 #endif
111     PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals));
112   }
113   PetscCall(MatRestoreRowUpperTriangular(A));
114   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
116 
117   if (reuse == MAT_INPLACE_MATRIX) {
118     PetscCall(MatHeaderReplace(A,&B));
119   } else {
120     *newmat = B;
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
126 {
127   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
128 
129   PetscFunctionBegin;
130   PetscCall(MatStoreValues(aij->A));
131   PetscCall(MatStoreValues(aij->B));
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138 
139   PetscFunctionBegin;
140   PetscCall(MatRetrieveValues(aij->A));
141   PetscCall(MatRetrieveValues(aij->B));
142   PetscFunctionReturn(0);
143 }
144 
145 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
146   { \
147     brow = row/bs;  \
148     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
149     rmax = aimax[brow]; nrow = ailen[brow]; \
150     bcol = col/bs; \
151     ridx = row % bs; cidx = col % bs; \
152     low  = 0; high = nrow; \
153     while (high-low > 3) { \
154       t = (low+high)/2; \
155       if (rp[t] > bcol) high = t; \
156       else              low  = t; \
157     } \
158     for (_i=low; _i<high; _i++) { \
159       if (rp[_i] > bcol) break; \
160       if (rp[_i] == bcol) { \
161         bap = ap + bs2*_i + bs*cidx + ridx; \
162         if (addv == ADD_VALUES) *bap += value;  \
163         else                    *bap  = value;  \
164         goto a_noinsert; \
165       } \
166     } \
167     if (a->nonew == 1) goto a_noinsert; \
168     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
170     N = nrow++ - 1;  \
171     /* shift up all the later entries in this row */ \
172     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
173     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
174     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
175     rp[_i]                      = bcol;  \
176     ap[bs2*_i + bs*cidx + ridx] = value;  \
177     A->nonzerostate++;\
178 a_noinsert:; \
179     ailen[brow] = nrow; \
180   }
181 
182 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
183   { \
184     brow = row/bs;  \
185     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
186     rmax = bimax[brow]; nrow = bilen[brow]; \
187     bcol = col/bs; \
188     ridx = row % bs; cidx = col % bs; \
189     low  = 0; high = nrow; \
190     while (high-low > 3) { \
191       t = (low+high)/2; \
192       if (rp[t] > bcol) high = t; \
193       else              low  = t; \
194     } \
195     for (_i=low; _i<high; _i++) { \
196       if (rp[_i] > bcol) break; \
197       if (rp[_i] == bcol) { \
198         bap = ap + bs2*_i + bs*cidx + ridx; \
199         if (addv == ADD_VALUES) *bap += value;  \
200         else                    *bap  = value;  \
201         goto b_noinsert; \
202       } \
203     } \
204     if (b->nonew == 1) goto b_noinsert; \
205     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
206     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
207     N = nrow++ - 1;  \
208     /* shift up all the later entries in this row */ \
209     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
210     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
211     PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \
212     rp[_i]                      = bcol;  \
213     ap[bs2*_i + bs*cidx + ridx] = value;  \
214     B->nonzerostate++;\
215 b_noinsert:; \
216     bilen[brow] = nrow; \
217   }
218 
219 /* Only add/insert a(i,j) with i<=j (blocks).
220    Any a(i,j) with i>j input by user is ingored or generates an error
221 */
222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
223 {
224   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
225   MatScalar      value;
226   PetscBool      roworiented = baij->roworiented;
227   PetscInt       i,j,row,col;
228   PetscInt       rstart_orig=mat->rmap->rstart;
229   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
230   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
231 
232   /* Some Variables required in the macro */
233   Mat          A     = baij->A;
234   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
235   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
236   MatScalar    *aa   =a->a;
237 
238   Mat         B     = baij->B;
239   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
240   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
241   MatScalar   *ba   =b->a;
242 
243   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
244   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
245   MatScalar *ap,*bap;
246 
247   /* for stash */
248   PetscInt  n_loc, *in_loc = NULL;
249   MatScalar *v_loc = NULL;
250 
251   PetscFunctionBegin;
252   if (!baij->donotstash) {
253     if (n > baij->n_loc) {
254       PetscCall(PetscFree(baij->in_loc));
255       PetscCall(PetscFree(baij->v_loc));
256       PetscCall(PetscMalloc1(n,&baij->in_loc));
257       PetscCall(PetscMalloc1(n,&baij->v_loc));
258 
259       baij->n_loc = n;
260     }
261     in_loc = baij->in_loc;
262     v_loc  = baij->v_loc;
263   }
264 
265   for (i=0; i<m; i++) {
266     if (im[i] < 0) continue;
267     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
268     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
269       row = im[i] - rstart_orig;              /* local row index */
270       for (j=0; j<n; j++) {
271         if (im[i]/bs > in[j]/bs) {
272           if (a->ignore_ltriangular) {
273             continue;    /* ignore lower triangular blocks */
274           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
275         }
276         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
277           col  = in[j] - cstart_orig;         /* local col index */
278           brow = row/bs; bcol = col/bs;
279           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
280           if (roworiented) value = v[i*n+j];
281           else             value = v[i+j*m];
282           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
283           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
284         } else if (in[j] < 0) {
285           continue;
286         } else {
287           PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
288           /* off-diag entry (B) */
289           if (mat->was_assembled) {
290             if (!baij->colmap) {
291               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
292             }
293 #if defined(PETSC_USE_CTABLE)
294             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
295             col  = col - 1;
296 #else
297             col = baij->colmap[in[j]/bs] - 1;
298 #endif
299             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
300               PetscCall(MatDisAssemble_MPISBAIJ(mat));
301               col  =  in[j];
302               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
303               B    = baij->B;
304               b    = (Mat_SeqBAIJ*)(B)->data;
305               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
306               ba   = b->a;
307             } else col += in[j]%bs;
308           } else col = in[j];
309           if (roworiented) value = v[i*n+j];
310           else             value = v[i+j*m];
311           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
312           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
313         }
314       }
315     } else {  /* off processor entry */
316       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
317       if (!baij->donotstash) {
318         mat->assembled = PETSC_FALSE;
319         n_loc          = 0;
320         for (j=0; j<n; j++) {
321           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
322           in_loc[n_loc] = in[j];
323           if (roworiented) {
324             v_loc[n_loc] = v[i*n+j];
325           } else {
326             v_loc[n_loc] = v[j*m+i];
327           }
328           n_loc++;
329         }
330         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE));
331       }
332     }
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
338 {
339   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
340   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
341   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
342   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
343   PetscBool         roworiented=a->roworiented;
344   const PetscScalar *value     = v;
345   MatScalar         *ap,*aa = a->a,*bap;
346 
347   PetscFunctionBegin;
348   if (col < row) {
349     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
350     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
351   }
352   rp   = aj + ai[row];
353   ap   = aa + bs2*ai[row];
354   rmax = imax[row];
355   nrow = ailen[row];
356   value = v;
357   low   = 0;
358   high  = nrow;
359 
360   while (high-low > 7) {
361     t = (low+high)/2;
362     if (rp[t] > col) high = t;
363     else             low  = t;
364   }
365   for (i=low; i<high; i++) {
366     if (rp[i] > col) break;
367     if (rp[i] == col) {
368       bap = ap +  bs2*i;
369       if (roworiented) {
370         if (is == ADD_VALUES) {
371           for (ii=0; ii<bs; ii++) {
372             for (jj=ii; jj<bs2; jj+=bs) {
373               bap[jj] += *value++;
374             }
375           }
376         } else {
377           for (ii=0; ii<bs; ii++) {
378             for (jj=ii; jj<bs2; jj+=bs) {
379               bap[jj] = *value++;
380             }
381           }
382         }
383       } else {
384         if (is == ADD_VALUES) {
385           for (ii=0; ii<bs; ii++) {
386             for (jj=0; jj<bs; jj++) {
387               *bap++ += *value++;
388             }
389           }
390         } else {
391           for (ii=0; ii<bs; ii++) {
392             for (jj=0; jj<bs; jj++) {
393               *bap++  = *value++;
394             }
395           }
396         }
397       }
398       goto noinsert2;
399     }
400   }
401   if (nonew == 1) goto noinsert2;
402   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
403   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
404   N = nrow++ - 1; high++;
405   /* shift up all the later entries in this row */
406   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
407   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
408   rp[i] = col;
409   bap   = ap +  bs2*i;
410   if (roworiented) {
411     for (ii=0; ii<bs; ii++) {
412       for (jj=ii; jj<bs2; jj+=bs) {
413         bap[jj] = *value++;
414       }
415     }
416   } else {
417     for (ii=0; ii<bs; ii++) {
418       for (jj=0; jj<bs; jj++) {
419         *bap++ = *value++;
420       }
421     }
422   }
423   noinsert2:;
424   ailen[row] = nrow;
425   PetscFunctionReturn(0);
426 }
427 
428 /*
429    This routine is exactly duplicated in mpibaij.c
430 */
431 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
432 {
433   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
434   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
435   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
436   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
437   PetscBool         roworiented=a->roworiented;
438   const PetscScalar *value     = v;
439   MatScalar         *ap,*aa = a->a,*bap;
440 
441   PetscFunctionBegin;
442   rp   = aj + ai[row];
443   ap   = aa + bs2*ai[row];
444   rmax = imax[row];
445   nrow = ailen[row];
446   low  = 0;
447   high = nrow;
448   value = v;
449   while (high-low > 7) {
450     t = (low+high)/2;
451     if (rp[t] > col) high = t;
452     else             low  = t;
453   }
454   for (i=low; i<high; i++) {
455     if (rp[i] > col) break;
456     if (rp[i] == col) {
457       bap = ap +  bs2*i;
458       if (roworiented) {
459         if (is == ADD_VALUES) {
460           for (ii=0; ii<bs; ii++) {
461             for (jj=ii; jj<bs2; jj+=bs) {
462               bap[jj] += *value++;
463             }
464           }
465         } else {
466           for (ii=0; ii<bs; ii++) {
467             for (jj=ii; jj<bs2; jj+=bs) {
468               bap[jj] = *value++;
469             }
470           }
471         }
472       } else {
473         if (is == ADD_VALUES) {
474           for (ii=0; ii<bs; ii++,value+=bs) {
475             for (jj=0; jj<bs; jj++) {
476               bap[jj] += value[jj];
477             }
478             bap += bs;
479           }
480         } else {
481           for (ii=0; ii<bs; ii++,value+=bs) {
482             for (jj=0; jj<bs; jj++) {
483               bap[jj]  = value[jj];
484             }
485             bap += bs;
486           }
487         }
488       }
489       goto noinsert2;
490     }
491   }
492   if (nonew == 1) goto noinsert2;
493   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
494   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
495   N = nrow++ - 1; high++;
496   /* shift up all the later entries in this row */
497   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
498   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
499   rp[i] = col;
500   bap   = ap +  bs2*i;
501   if (roworiented) {
502     for (ii=0; ii<bs; ii++) {
503       for (jj=ii; jj<bs2; jj+=bs) {
504         bap[jj] = *value++;
505       }
506     }
507   } else {
508     for (ii=0; ii<bs; ii++) {
509       for (jj=0; jj<bs; jj++) {
510         *bap++ = *value++;
511       }
512     }
513   }
514   noinsert2:;
515   ailen[row] = nrow;
516   PetscFunctionReturn(0);
517 }
518 
519 /*
520     This routine could be optimized by removing the need for the block copy below and passing stride information
521   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
522 */
523 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
524 {
525   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
526   const MatScalar *value;
527   MatScalar       *barray     =baij->barray;
528   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
529   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
530   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
531   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
532 
533   PetscFunctionBegin;
534   if (!barray) {
535     PetscCall(PetscMalloc1(bs2,&barray));
536     baij->barray = barray;
537   }
538 
539   if (roworiented) {
540     stepval = (n-1)*bs;
541   } else {
542     stepval = (m-1)*bs;
543   }
544   for (i=0; i<m; i++) {
545     if (im[i] < 0) continue;
546     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
547     if (im[i] >= rstart && im[i] < rend) {
548       row = im[i] - rstart;
549       for (j=0; j<n; j++) {
550         if (im[i] > in[j]) {
551           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
552           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
553         }
554         /* If NumCol = 1 then a copy is not required */
555         if ((roworiented) && (n == 1)) {
556           barray = (MatScalar*) v + i*bs2;
557         } else if ((!roworiented) && (m == 1)) {
558           barray = (MatScalar*) v + j*bs2;
559         } else { /* Here a copy is required */
560           if (roworiented) {
561             value = v + i*(stepval+bs)*bs + j*bs;
562           } else {
563             value = v + j*(stepval+bs)*bs + i*bs;
564           }
565           for (ii=0; ii<bs; ii++,value+=stepval) {
566             for (jj=0; jj<bs; jj++) {
567               *barray++ = *value++;
568             }
569           }
570           barray -=bs2;
571         }
572 
573         if (in[j] >= cstart && in[j] < cend) {
574           col  = in[j] - cstart;
575           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
576         } else if (in[j] < 0) {
577           continue;
578         } else {
579           PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
580           if (mat->was_assembled) {
581             if (!baij->colmap) {
582               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
583             }
584 
585 #if defined(PETSC_USE_DEBUG)
586 #if defined(PETSC_USE_CTABLE)
587             { PetscInt data;
588               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
589               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
590             }
591 #else
592             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
593 #endif
594 #endif
595 #if defined(PETSC_USE_CTABLE)
596             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
597             col  = (col - 1)/bs;
598 #else
599             col = (baij->colmap[in[j]] - 1)/bs;
600 #endif
601             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
602               PetscCall(MatDisAssemble_MPISBAIJ(mat));
603               col  = in[j];
604             }
605           } else col = in[j];
606           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
607         }
608       }
609     } else {
610       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
611       if (!baij->donotstash) {
612         if (roworiented) {
613           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
614         } else {
615           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
616         }
617       }
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
624 {
625   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
626   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
627   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
628 
629   PetscFunctionBegin;
630   for (i=0; i<m; i++) {
631     if (idxm[i] < 0) continue; /* negative row */
632     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
633     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
634       row = idxm[i] - bsrstart;
635       for (j=0; j<n; j++) {
636         if (idxn[j] < 0) continue; /* negative column */
637         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
638         if (idxn[j] >= bscstart && idxn[j] < bscend) {
639           col  = idxn[j] - bscstart;
640           PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
641         } else {
642           if (!baij->colmap) {
643             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
644           }
645 #if defined(PETSC_USE_CTABLE)
646           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
647           data--;
648 #else
649           data = baij->colmap[idxn[j]/bs]-1;
650 #endif
651           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
652           else {
653             col  = data + idxn[j]%bs;
654             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
655           }
656         }
657       }
658     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
664 {
665   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
666   PetscReal      sum[2],*lnorm2;
667 
668   PetscFunctionBegin;
669   if (baij->size == 1) {
670     PetscCall(MatNorm(baij->A,type,norm));
671   } else {
672     if (type == NORM_FROBENIUS) {
673       PetscCall(PetscMalloc1(2,&lnorm2));
674       PetscCall(MatNorm(baij->A,type,lnorm2));
675       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
676       PetscCall(MatNorm(baij->B,type,lnorm2));
677       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
678       PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
679       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
680       PetscCall(PetscFree(lnorm2));
681     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
682       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
683       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
684       PetscReal    *rsum,*rsum2,vabs;
685       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
686       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
687       MatScalar    *v;
688 
689       PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2));
690       PetscCall(PetscArrayzero(rsum,mat->cmap->N));
691       /* Amat */
692       v = amat->a; jj = amat->j;
693       for (brow=0; brow<mbs; brow++) {
694         grow = bs*(rstart + brow);
695         nz   = amat->i[brow+1] - amat->i[brow];
696         for (bcol=0; bcol<nz; bcol++) {
697           gcol = bs*(rstart + *jj); jj++;
698           for (col=0; col<bs; col++) {
699             for (row=0; row<bs; row++) {
700               vabs            = PetscAbsScalar(*v); v++;
701               rsum[gcol+col] += vabs;
702               /* non-diagonal block */
703               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
704             }
705           }
706         }
707         PetscCall(PetscLogFlops(nz*bs*bs));
708       }
709       /* Bmat */
710       v = bmat->a; jj = bmat->j;
711       for (brow=0; brow<mbs; brow++) {
712         grow = bs*(rstart + brow);
713         nz = bmat->i[brow+1] - bmat->i[brow];
714         for (bcol=0; bcol<nz; bcol++) {
715           gcol = bs*garray[*jj]; jj++;
716           for (col=0; col<bs; col++) {
717             for (row=0; row<bs; row++) {
718               vabs            = PetscAbsScalar(*v); v++;
719               rsum[gcol+col] += vabs;
720               rsum[grow+row] += vabs;
721             }
722           }
723         }
724         PetscCall(PetscLogFlops(nz*bs*bs));
725       }
726       PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
727       *norm = 0.0;
728       for (col=0; col<mat->cmap->N; col++) {
729         if (rsum2[col] > *norm) *norm = rsum2[col];
730       }
731       PetscCall(PetscFree2(rsum,rsum2));
732     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
733   }
734   PetscFunctionReturn(0);
735 }
736 
737 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
738 {
739   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
740   PetscInt       nstash,reallocs;
741 
742   PetscFunctionBegin;
743   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
744 
745   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
746   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
747   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
748   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
749   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
750   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
751   PetscFunctionReturn(0);
752 }
753 
754 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
755 {
756   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
757   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
758   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
759   PetscInt       *row,*col;
760   PetscBool      other_disassembled;
761   PetscMPIInt    n;
762   PetscBool      r1,r2,r3;
763   MatScalar      *val;
764 
765   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
766   PetscFunctionBegin;
767   if (!baij->donotstash &&  !mat->nooffprocentries) {
768     while (1) {
769       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
770       if (!flg) break;
771 
772       for (i=0; i<n;) {
773         /* Now identify the consecutive vals belonging to the same row */
774         for (j=i,rstart=row[j]; j<n; j++) {
775           if (row[j] != rstart) break;
776         }
777         if (j < n) ncols = j-i;
778         else       ncols = n-i;
779         /* Now assemble all these values with a single function call */
780         PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
781         i    = j;
782       }
783     }
784     PetscCall(MatStashScatterEnd_Private(&mat->stash));
785     /* Now process the block-stash. Since the values are stashed column-oriented,
786        set the roworiented flag to column oriented, and after MatSetValues()
787        restore the original flags */
788     r1 = baij->roworiented;
789     r2 = a->roworiented;
790     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
791 
792     baij->roworiented = PETSC_FALSE;
793     a->roworiented    = PETSC_FALSE;
794 
795     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
796     while (1) {
797       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
798       if (!flg) break;
799 
800       for (i=0; i<n;) {
801         /* Now identify the consecutive vals belonging to the same row */
802         for (j=i,rstart=row[j]; j<n; j++) {
803           if (row[j] != rstart) break;
804         }
805         if (j < n) ncols = j-i;
806         else       ncols = n-i;
807         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
808         i    = j;
809       }
810     }
811     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
812 
813     baij->roworiented = r1;
814     a->roworiented    = r2;
815 
816     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
817   }
818 
819   PetscCall(MatAssemblyBegin(baij->A,mode));
820   PetscCall(MatAssemblyEnd(baij->A,mode));
821 
822   /* determine if any processor has disassembled, if so we must
823      also disassemble ourselves, in order that we may reassemble. */
824   /*
825      if nonzero structure of submatrix B cannot change then we know that
826      no processor disassembled thus we can skip this stuff
827   */
828   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
829     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat)));
830     if (mat->was_assembled && !other_disassembled) {
831       PetscCall(MatDisAssemble_MPISBAIJ(mat));
832     }
833   }
834 
835   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
836     PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
837   }
838   PetscCall(MatAssemblyBegin(baij->B,mode));
839   PetscCall(MatAssemblyEnd(baij->B,mode));
840 
841   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
842 
843   baij->rowvalues = NULL;
844 
845   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
846   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
847     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
848     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
849   }
850   PetscFunctionReturn(0);
851 }
852 
853 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
854 #include <petscdraw.h>
855 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
856 {
857   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
858   PetscInt          bs   = mat->rmap->bs;
859   PetscMPIInt       rank = baij->rank;
860   PetscBool         iascii,isdraw;
861   PetscViewer       sviewer;
862   PetscViewerFormat format;
863 
864   PetscFunctionBegin;
865   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
866   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
867   if (iascii) {
868     PetscCall(PetscViewerGetFormat(viewer,&format));
869     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
870       MatInfo info;
871       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
872       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
873       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
874       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
875       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
876       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
877       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
878       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
879       PetscCall(PetscViewerFlush(viewer));
880       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
881       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
882       PetscCall(VecScatterView(baij->Mvctx,viewer));
883       PetscFunctionReturn(0);
884     } else if (format == PETSC_VIEWER_ASCII_INFO) {
885       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
886       PetscFunctionReturn(0);
887     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
888       PetscFunctionReturn(0);
889     }
890   }
891 
892   if (isdraw) {
893     PetscDraw draw;
894     PetscBool isnull;
895     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
896     PetscCall(PetscDrawIsNull(draw,&isnull));
897     if (isnull) PetscFunctionReturn(0);
898   }
899 
900   {
901     /* assemble the entire matrix onto first processor. */
902     Mat          A;
903     Mat_SeqSBAIJ *Aloc;
904     Mat_SeqBAIJ  *Bloc;
905     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
906     MatScalar    *a;
907     const char   *matname;
908 
909     /* Should this be the same type as mat? */
910     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
911     if (rank == 0) {
912       PetscCall(MatSetSizes(A,M,N,M,N));
913     } else {
914       PetscCall(MatSetSizes(A,0,0,M,N));
915     }
916     PetscCall(MatSetType(A,MATMPISBAIJ));
917     PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
918     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
919     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
920 
921     /* copy over the A part */
922     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
923     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
924     PetscCall(PetscMalloc1(bs,&rvals));
925 
926     for (i=0; i<mbs; i++) {
927       rvals[0] = bs*(baij->rstartbs + i);
928       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
929       for (j=ai[i]; j<ai[i+1]; j++) {
930         col = (baij->cstartbs+aj[j])*bs;
931         for (k=0; k<bs; k++) {
932           PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
933           col++;
934           a += bs;
935         }
936       }
937     }
938     /* copy over the B part */
939     Bloc = (Mat_SeqBAIJ*)baij->B->data;
940     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
941     for (i=0; i<mbs; i++) {
942 
943       rvals[0] = bs*(baij->rstartbs + i);
944       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
945       for (j=ai[i]; j<ai[i+1]; j++) {
946         col = baij->garray[aj[j]]*bs;
947         for (k=0; k<bs; k++) {
948           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
949           col++;
950           a += bs;
951         }
952       }
953     }
954     PetscCall(PetscFree(rvals));
955     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
956     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
957     /*
958        Everyone has to call to draw the matrix since the graphics waits are
959        synchronized across all processors that share the PetscDraw object
960     */
961     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
962     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
963     if (rank == 0) {
964       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname));
965       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer));
966     }
967     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
968     PetscCall(PetscViewerFlush(viewer));
969     PetscCall(MatDestroy(&A));
970   }
971   PetscFunctionReturn(0);
972 }
973 
974 /* Used for both MPIBAIJ and MPISBAIJ matrices */
975 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
976 
977 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
978 {
979   PetscBool      iascii,isdraw,issocket,isbinary;
980 
981   PetscFunctionBegin;
982   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
983   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
984   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
985   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
986   if (iascii || isdraw || issocket) {
987     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer));
988   } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat,viewer));
989   PetscFunctionReturn(0);
990 }
991 
992 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
993 {
994   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
995 
996   PetscFunctionBegin;
997 #if defined(PETSC_USE_LOG)
998   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
999 #endif
1000   PetscCall(MatStashDestroy_Private(&mat->stash));
1001   PetscCall(MatStashDestroy_Private(&mat->bstash));
1002   PetscCall(MatDestroy(&baij->A));
1003   PetscCall(MatDestroy(&baij->B));
1004 #if defined(PETSC_USE_CTABLE)
1005   PetscCall(PetscTableDestroy(&baij->colmap));
1006 #else
1007   PetscCall(PetscFree(baij->colmap));
1008 #endif
1009   PetscCall(PetscFree(baij->garray));
1010   PetscCall(VecDestroy(&baij->lvec));
1011   PetscCall(VecScatterDestroy(&baij->Mvctx));
1012   PetscCall(VecDestroy(&baij->slvec0));
1013   PetscCall(VecDestroy(&baij->slvec0b));
1014   PetscCall(VecDestroy(&baij->slvec1));
1015   PetscCall(VecDestroy(&baij->slvec1a));
1016   PetscCall(VecDestroy(&baij->slvec1b));
1017   PetscCall(VecScatterDestroy(&baij->sMvctx));
1018   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1019   PetscCall(PetscFree(baij->barray));
1020   PetscCall(PetscFree(baij->hd));
1021   PetscCall(VecDestroy(&baij->diag));
1022   PetscCall(VecDestroy(&baij->bb1));
1023   PetscCall(VecDestroy(&baij->xx1));
1024 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1025   PetscCall(PetscFree(baij->setvaluescopy));
1026 #endif
1027   PetscCall(PetscFree(baij->in_loc));
1028   PetscCall(PetscFree(baij->v_loc));
1029   PetscCall(PetscFree(baij->rangebs));
1030   PetscCall(PetscFree(mat->data));
1031 
1032   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1033   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1034   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1035   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL));
1036   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL));
1037 #if defined(PETSC_HAVE_ELEMENTAL)
1038   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL));
1039 #endif
1040 #if defined(PETSC_HAVE_SCALAPACK)
1041   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL));
1042 #endif
1043   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL));
1044   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL));
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1049 {
1050   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1051   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1052   PetscScalar       *from;
1053   const PetscScalar *x;
1054 
1055   PetscFunctionBegin;
1056   /* diagonal part */
1057   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1058   PetscCall(VecSet(a->slvec1b,0.0));
1059 
1060   /* subdiagonal part */
1061   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1062   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1063 
1064   /* copy x into the vec slvec0 */
1065   PetscCall(VecGetArray(a->slvec0,&from));
1066   PetscCall(VecGetArrayRead(xx,&x));
1067 
1068   PetscCall(PetscArraycpy(from,x,bs*mbs));
1069   PetscCall(VecRestoreArray(a->slvec0,&from));
1070   PetscCall(VecRestoreArrayRead(xx,&x));
1071 
1072   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1073   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1074   /* supperdiagonal part */
1075   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1082   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1083   PetscScalar       *from;
1084   const PetscScalar *x;
1085 
1086   PetscFunctionBegin;
1087   /* diagonal part */
1088   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1089   PetscCall(VecSet(a->slvec1b,0.0));
1090 
1091   /* subdiagonal part */
1092   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1093 
1094   /* copy x into the vec slvec0 */
1095   PetscCall(VecGetArray(a->slvec0,&from));
1096   PetscCall(VecGetArrayRead(xx,&x));
1097 
1098   PetscCall(PetscArraycpy(from,x,bs*mbs));
1099   PetscCall(VecRestoreArray(a->slvec0,&from));
1100   PetscCall(VecRestoreArrayRead(xx,&x));
1101 
1102   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1103   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1104   /* supperdiagonal part */
1105   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1110 {
1111   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1112   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1113   PetscScalar       *from,zero=0.0;
1114   const PetscScalar *x;
1115 
1116   PetscFunctionBegin;
1117   /* diagonal part */
1118   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1119   PetscCall(VecSet(a->slvec1b,zero));
1120 
1121   /* subdiagonal part */
1122   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1123   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1124 
1125   /* copy x into the vec slvec0 */
1126   PetscCall(VecGetArray(a->slvec0,&from));
1127   PetscCall(VecGetArrayRead(xx,&x));
1128   PetscCall(PetscArraycpy(from,x,bs*mbs));
1129   PetscCall(VecRestoreArray(a->slvec0,&from));
1130 
1131   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1132   PetscCall(VecRestoreArrayRead(xx,&x));
1133   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1134 
1135   /* supperdiagonal part */
1136   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1141 {
1142   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1143   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1144   PetscScalar       *from,zero=0.0;
1145   const PetscScalar *x;
1146 
1147   PetscFunctionBegin;
1148   /* diagonal part */
1149   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1150   PetscCall(VecSet(a->slvec1b,zero));
1151 
1152   /* subdiagonal part */
1153   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1154 
1155   /* copy x into the vec slvec0 */
1156   PetscCall(VecGetArray(a->slvec0,&from));
1157   PetscCall(VecGetArrayRead(xx,&x));
1158   PetscCall(PetscArraycpy(from,x,bs*mbs));
1159   PetscCall(VecRestoreArray(a->slvec0,&from));
1160 
1161   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1162   PetscCall(VecRestoreArrayRead(xx,&x));
1163   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1164 
1165   /* supperdiagonal part */
1166   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 /*
1171   This only works correctly for square matrices where the subblock A->A is the
1172    diagonal block
1173 */
1174 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1175 {
1176   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1177 
1178   PetscFunctionBegin;
1179   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1180   PetscCall(MatGetDiagonal(a->A,v));
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1185 {
1186   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1187 
1188   PetscFunctionBegin;
1189   PetscCall(MatScale(a->A,aa));
1190   PetscCall(MatScale(a->B,aa));
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1195 {
1196   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1197   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1198   PetscInt     bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1199   PetscInt     nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1200   PetscInt    *cmap,*idx_p,cstart = mat->rstartbs;
1201 
1202   PetscFunctionBegin;
1203   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1204   mat->getrowactive = PETSC_TRUE;
1205 
1206   if (!mat->rowvalues && (idx || v)) {
1207     /*
1208         allocate enough space to hold information from the longest row.
1209     */
1210     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1211     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1212     PetscInt     max = 1,mbs = mat->mbs,tmp;
1213     for (i=0; i<mbs; i++) {
1214       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1215       if (max < tmp) max = tmp;
1216     }
1217     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1218   }
1219 
1220   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1221   lrow = row - brstart;  /* local row index */
1222 
1223   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1224   if (!v)   {pvA = NULL; pvB = NULL;}
1225   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1226   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1227   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1228   nztot = nzA + nzB;
1229 
1230   cmap = mat->garray;
1231   if (v  || idx) {
1232     if (nztot) {
1233       /* Sort by increasing column numbers, assuming A and B already sorted */
1234       PetscInt imark = -1;
1235       if (v) {
1236         *v = v_p = mat->rowvalues;
1237         for (i=0; i<nzB; i++) {
1238           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1239           else break;
1240         }
1241         imark = i;
1242         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1243         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1244       }
1245       if (idx) {
1246         *idx = idx_p = mat->rowindices;
1247         if (imark > -1) {
1248           for (i=0; i<imark; i++) {
1249             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1250           }
1251         } else {
1252           for (i=0; i<nzB; i++) {
1253             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1254             else break;
1255           }
1256           imark = i;
1257         }
1258         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1259         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1260       }
1261     } else {
1262       if (idx) *idx = NULL;
1263       if (v)   *v   = NULL;
1264     }
1265   }
1266   *nz  = nztot;
1267   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1268   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1273 {
1274   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1275 
1276   PetscFunctionBegin;
1277   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1278   baij->getrowactive = PETSC_FALSE;
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1283 {
1284   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1285   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1286 
1287   PetscFunctionBegin;
1288   aA->getrow_utriangular = PETSC_TRUE;
1289   PetscFunctionReturn(0);
1290 }
1291 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1292 {
1293   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1294   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1295 
1296   PetscFunctionBegin;
1297   aA->getrow_utriangular = PETSC_FALSE;
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1302 {
1303   PetscFunctionBegin;
1304   if (PetscDefined(USE_COMPLEX)) {
1305     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
1306 
1307     PetscCall(MatConjugate(a->A));
1308     PetscCall(MatConjugate(a->B));
1309   }
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1314 {
1315   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1316 
1317   PetscFunctionBegin;
1318   PetscCall(MatRealPart(a->A));
1319   PetscCall(MatRealPart(a->B));
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1324 {
1325   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1326 
1327   PetscFunctionBegin;
1328   PetscCall(MatImaginaryPart(a->A));
1329   PetscCall(MatImaginaryPart(a->B));
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1334    Input: isrow       - distributed(parallel),
1335           iscol_local - locally owned (seq)
1336 */
1337 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1338 {
1339   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1340   const PetscInt *ptr1,*ptr2;
1341 
1342   PetscFunctionBegin;
1343   PetscCall(ISGetLocalSize(isrow,&sz1));
1344   PetscCall(ISGetLocalSize(iscol_local,&sz2));
1345   if (sz1 > sz2) {
1346     *flg = PETSC_FALSE;
1347     PetscFunctionReturn(0);
1348   }
1349 
1350   PetscCall(ISGetIndices(isrow,&ptr1));
1351   PetscCall(ISGetIndices(iscol_local,&ptr2));
1352 
1353   PetscCall(PetscMalloc1(sz1,&a1));
1354   PetscCall(PetscMalloc1(sz2,&a2));
1355   PetscCall(PetscArraycpy(a1,ptr1,sz1));
1356   PetscCall(PetscArraycpy(a2,ptr2,sz2));
1357   PetscCall(PetscSortInt(sz1,a1));
1358   PetscCall(PetscSortInt(sz2,a2));
1359 
1360   nmatch=0;
1361   k     = 0;
1362   for (i=0; i<sz1; i++) {
1363     for (j=k; j<sz2; j++) {
1364       if (a1[i] == a2[j]) {
1365         k = j; nmatch++;
1366         break;
1367       }
1368     }
1369   }
1370   PetscCall(ISRestoreIndices(isrow,&ptr1));
1371   PetscCall(ISRestoreIndices(iscol_local,&ptr2));
1372   PetscCall(PetscFree(a1));
1373   PetscCall(PetscFree(a2));
1374   if (nmatch < sz1) {
1375     *flg = PETSC_FALSE;
1376   } else {
1377     *flg = PETSC_TRUE;
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1383 {
1384   IS             iscol_local;
1385   PetscInt       csize;
1386   PetscBool      isequal;
1387 
1388   PetscFunctionBegin;
1389   PetscCall(ISGetLocalSize(iscol,&csize));
1390   if (call == MAT_REUSE_MATRIX) {
1391     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1392     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1393   } else {
1394     PetscBool issorted;
1395 
1396     PetscCall(ISAllGather(iscol,&iscol_local));
1397     PetscCall(ISEqual_private(isrow,iscol_local,&isequal));
1398     PetscCall(ISSorted(iscol_local, &issorted));
1399     PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted");
1400   }
1401 
1402   /* now call MatCreateSubMatrix_MPIBAIJ() */
1403   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1404   if (call == MAT_INITIAL_MATRIX) {
1405     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1406     PetscCall(ISDestroy(&iscol_local));
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1414 
1415   PetscFunctionBegin;
1416   PetscCall(MatZeroEntries(l->A));
1417   PetscCall(MatZeroEntries(l->B));
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1422 {
1423   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1424   Mat            A  = a->A,B = a->B;
1425   PetscLogDouble isend[5],irecv[5];
1426 
1427   PetscFunctionBegin;
1428   info->block_size = (PetscReal)matin->rmap->bs;
1429 
1430   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1431 
1432   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1433   isend[3] = info->memory;  isend[4] = info->mallocs;
1434 
1435   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1436 
1437   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1438   isend[3] += info->memory;  isend[4] += info->mallocs;
1439   if (flag == MAT_LOCAL) {
1440     info->nz_used      = isend[0];
1441     info->nz_allocated = isend[1];
1442     info->nz_unneeded  = isend[2];
1443     info->memory       = isend[3];
1444     info->mallocs      = isend[4];
1445   } else if (flag == MAT_GLOBAL_MAX) {
1446     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1447 
1448     info->nz_used      = irecv[0];
1449     info->nz_allocated = irecv[1];
1450     info->nz_unneeded  = irecv[2];
1451     info->memory       = irecv[3];
1452     info->mallocs      = irecv[4];
1453   } else if (flag == MAT_GLOBAL_SUM) {
1454     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1455 
1456     info->nz_used      = irecv[0];
1457     info->nz_allocated = irecv[1];
1458     info->nz_unneeded  = irecv[2];
1459     info->memory       = irecv[3];
1460     info->mallocs      = irecv[4];
1461   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1462   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1463   info->fill_ratio_needed = 0;
1464   info->factor_mallocs    = 0;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1469 {
1470   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1471   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1472 
1473   PetscFunctionBegin;
1474   switch (op) {
1475   case MAT_NEW_NONZERO_LOCATIONS:
1476   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1477   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1478   case MAT_KEEP_NONZERO_PATTERN:
1479   case MAT_SUBMAT_SINGLEIS:
1480   case MAT_NEW_NONZERO_LOCATION_ERR:
1481     MatCheckPreallocated(A,1);
1482     PetscCall(MatSetOption(a->A,op,flg));
1483     PetscCall(MatSetOption(a->B,op,flg));
1484     break;
1485   case MAT_ROW_ORIENTED:
1486     MatCheckPreallocated(A,1);
1487     a->roworiented = flg;
1488 
1489     PetscCall(MatSetOption(a->A,op,flg));
1490     PetscCall(MatSetOption(a->B,op,flg));
1491     break;
1492   case MAT_FORCE_DIAGONAL_ENTRIES:
1493   case MAT_SORTED_FULL:
1494     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1495     break;
1496   case MAT_IGNORE_OFF_PROC_ENTRIES:
1497     a->donotstash = flg;
1498     break;
1499   case MAT_USE_HASH_TABLE:
1500     a->ht_flag = flg;
1501     break;
1502   case MAT_HERMITIAN:
1503     MatCheckPreallocated(A,1);
1504     PetscCall(MatSetOption(a->A,op,flg));
1505 #if defined(PETSC_USE_COMPLEX)
1506     if (flg) { /* need different mat-vec ops */
1507       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1508       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1509       A->ops->multtranspose    = NULL;
1510       A->ops->multtransposeadd = NULL;
1511       A->symmetric = PETSC_BOOL3_FALSE;
1512     }
1513 #endif
1514     break;
1515   case MAT_SPD:
1516   case MAT_SYMMETRIC:
1517     MatCheckPreallocated(A,1);
1518     PetscCall(MatSetOption(a->A,op,flg));
1519 #if defined(PETSC_USE_COMPLEX)
1520     if (flg) { /* restore to use default mat-vec ops */
1521       A->ops->mult             = MatMult_MPISBAIJ;
1522       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1523       A->ops->multtranspose    = MatMult_MPISBAIJ;
1524       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1525     }
1526 #endif
1527     break;
1528   case MAT_STRUCTURALLY_SYMMETRIC:
1529     MatCheckPreallocated(A,1);
1530     PetscCall(MatSetOption(a->A,op,flg));
1531     break;
1532   case MAT_SYMMETRY_ETERNAL:
1533   case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1534     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1535     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1536     break;
1537   case MAT_SPD_ETERNAL:
1538     break;
1539   case MAT_IGNORE_LOWER_TRIANGULAR:
1540     aA->ignore_ltriangular = flg;
1541     break;
1542   case MAT_ERROR_LOWER_TRIANGULAR:
1543     aA->ignore_ltriangular = flg;
1544     break;
1545   case MAT_GETROW_UPPERTRIANGULAR:
1546     aA->getrow_utriangular = flg;
1547     break;
1548   default:
1549     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1550   }
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1555 {
1556   PetscFunctionBegin;
1557   if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*B));
1558   if (reuse == MAT_INITIAL_MATRIX) {
1559     PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B));
1560   }  else if (reuse == MAT_REUSE_MATRIX) {
1561     PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN));
1562   }
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1567 {
1568   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1569   Mat            a     = baij->A, b=baij->B;
1570   PetscInt       nv,m,n;
1571   PetscBool      flg;
1572 
1573   PetscFunctionBegin;
1574   if (ll != rr) {
1575     PetscCall(VecEqual(ll,rr,&flg));
1576     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same");
1577   }
1578   if (!ll) PetscFunctionReturn(0);
1579 
1580   PetscCall(MatGetLocalSize(mat,&m,&n));
1581   PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n);
1582 
1583   PetscCall(VecGetLocalSize(rr,&nv));
1584   PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1585 
1586   PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1587 
1588   /* left diagonalscale the off-diagonal part */
1589   PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1590 
1591   /* scale the diagonal part */
1592   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1593 
1594   /* right diagonalscale the off-diagonal part */
1595   PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1596   PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1601 {
1602   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1603 
1604   PetscFunctionBegin;
1605   PetscCall(MatSetUnfactored(a->A));
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1610 
1611 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1612 {
1613   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1614   Mat            a,b,c,d;
1615   PetscBool      flg;
1616 
1617   PetscFunctionBegin;
1618   a = matA->A; b = matA->B;
1619   c = matB->A; d = matB->B;
1620 
1621   PetscCall(MatEqual(a,c,&flg));
1622   if (flg) {
1623     PetscCall(MatEqual(b,d,&flg));
1624   }
1625   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1626   PetscFunctionReturn(0);
1627 }
1628 
1629 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1630 {
1631   PetscBool      isbaij;
1632 
1633   PetscFunctionBegin;
1634   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,""));
1635   PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1636   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1637   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1638     PetscCall(MatGetRowUpperTriangular(A));
1639     PetscCall(MatCopy_Basic(A,B,str));
1640     PetscCall(MatRestoreRowUpperTriangular(A));
1641   } else {
1642     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1643     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1644 
1645     PetscCall(MatCopy(a->A,b->A,str));
1646     PetscCall(MatCopy(a->B,b->B,str));
1647   }
1648   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1653 {
1654   PetscFunctionBegin;
1655   PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1656   PetscFunctionReturn(0);
1657 }
1658 
1659 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1660 {
1661   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1662   PetscBLASInt   bnz,one=1;
1663   Mat_SeqSBAIJ   *xa,*ya;
1664   Mat_SeqBAIJ    *xb,*yb;
1665 
1666   PetscFunctionBegin;
1667   if (str == SAME_NONZERO_PATTERN) {
1668     PetscScalar alpha = a;
1669     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1670     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1671     PetscCall(PetscBLASIntCast(xa->nz,&bnz));
1672     PetscCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1673     xb   = (Mat_SeqBAIJ*)xx->B->data;
1674     yb   = (Mat_SeqBAIJ*)yy->B->data;
1675     PetscCall(PetscBLASIntCast(xb->nz,&bnz));
1676     PetscCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1677     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1678   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1679     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
1680     PetscCall(MatAXPY_Basic(Y,a,X,str));
1681     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
1682   } else {
1683     Mat      B;
1684     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1685     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1686     PetscCall(MatGetRowUpperTriangular(X));
1687     PetscCall(MatGetRowUpperTriangular(Y));
1688     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1689     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1690     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1691     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1692     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1693     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1694     PetscCall(MatSetType(B,MATMPISBAIJ));
1695     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d));
1696     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1697     PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1698     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1699     PetscCall(MatHeaderMerge(Y,&B));
1700     PetscCall(PetscFree(nnz_d));
1701     PetscCall(PetscFree(nnz_o));
1702     PetscCall(MatRestoreRowUpperTriangular(X));
1703     PetscCall(MatRestoreRowUpperTriangular(Y));
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1709 {
1710   PetscInt       i;
1711   PetscBool      flg;
1712 
1713   PetscFunctionBegin;
1714   PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */
1715   for (i=0; i<n; i++) {
1716     PetscCall(ISEqual(irow[i],icol[i],&flg));
1717     if (!flg) {
1718       PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1719     }
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1725 {
1726   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1727   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1728 
1729   PetscFunctionBegin;
1730   if (!Y->preallocated) {
1731     PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
1732   } else if (!aij->nz) {
1733     PetscInt nonew = aij->nonew;
1734     PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
1735     aij->nonew = nonew;
1736   }
1737   PetscCall(MatShift_Basic(Y,a));
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1742 {
1743   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1744 
1745   PetscFunctionBegin;
1746   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1747   PetscCall(MatMissingDiagonal(a->A,missing,d));
1748   if (d) {
1749     PetscInt rstart;
1750     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1751     *d += rstart/A->rmap->bs;
1752 
1753   }
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1758 {
1759   PetscFunctionBegin;
1760   *a = ((Mat_MPISBAIJ*)A->data)->A;
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 /* -------------------------------------------------------------------*/
1765 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1766                                        MatGetRow_MPISBAIJ,
1767                                        MatRestoreRow_MPISBAIJ,
1768                                        MatMult_MPISBAIJ,
1769                                /*  4*/ MatMultAdd_MPISBAIJ,
1770                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1771                                        MatMultAdd_MPISBAIJ,
1772                                        NULL,
1773                                        NULL,
1774                                        NULL,
1775                                /* 10*/ NULL,
1776                                        NULL,
1777                                        NULL,
1778                                        MatSOR_MPISBAIJ,
1779                                        MatTranspose_MPISBAIJ,
1780                                /* 15*/ MatGetInfo_MPISBAIJ,
1781                                        MatEqual_MPISBAIJ,
1782                                        MatGetDiagonal_MPISBAIJ,
1783                                        MatDiagonalScale_MPISBAIJ,
1784                                        MatNorm_MPISBAIJ,
1785                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1786                                        MatAssemblyEnd_MPISBAIJ,
1787                                        MatSetOption_MPISBAIJ,
1788                                        MatZeroEntries_MPISBAIJ,
1789                                /* 24*/ NULL,
1790                                        NULL,
1791                                        NULL,
1792                                        NULL,
1793                                        NULL,
1794                                /* 29*/ MatSetUp_MPISBAIJ,
1795                                        NULL,
1796                                        NULL,
1797                                        MatGetDiagonalBlock_MPISBAIJ,
1798                                        NULL,
1799                                /* 34*/ MatDuplicate_MPISBAIJ,
1800                                        NULL,
1801                                        NULL,
1802                                        NULL,
1803                                        NULL,
1804                                /* 39*/ MatAXPY_MPISBAIJ,
1805                                        MatCreateSubMatrices_MPISBAIJ,
1806                                        MatIncreaseOverlap_MPISBAIJ,
1807                                        MatGetValues_MPISBAIJ,
1808                                        MatCopy_MPISBAIJ,
1809                                /* 44*/ NULL,
1810                                        MatScale_MPISBAIJ,
1811                                        MatShift_MPISBAIJ,
1812                                        NULL,
1813                                        NULL,
1814                                /* 49*/ NULL,
1815                                        NULL,
1816                                        NULL,
1817                                        NULL,
1818                                        NULL,
1819                                /* 54*/ NULL,
1820                                        NULL,
1821                                        MatSetUnfactored_MPISBAIJ,
1822                                        NULL,
1823                                        MatSetValuesBlocked_MPISBAIJ,
1824                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                /* 64*/ NULL,
1830                                        NULL,
1831                                        NULL,
1832                                        NULL,
1833                                        NULL,
1834                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1835                                        NULL,
1836                                        MatConvert_MPISBAIJ_Basic,
1837                                        NULL,
1838                                        NULL,
1839                                /* 74*/ NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        NULL,
1844                                /* 79*/ NULL,
1845                                        NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        MatLoad_MPISBAIJ,
1849                                /* 84*/ NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                /* 89*/ NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                /* 94*/ NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        NULL,
1863                                        NULL,
1864                                /* 99*/ NULL,
1865                                        NULL,
1866                                        NULL,
1867                                        MatConjugate_MPISBAIJ,
1868                                        NULL,
1869                                /*104*/ NULL,
1870                                        MatRealPart_MPISBAIJ,
1871                                        MatImaginaryPart_MPISBAIJ,
1872                                        MatGetRowUpperTriangular_MPISBAIJ,
1873                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1874                                /*109*/ NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        MatMissingDiagonal_MPISBAIJ,
1879                                /*114*/ NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                /*119*/ NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                /*124*/ NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                /*129*/ NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                /*134*/ NULL,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                /*139*/ MatSetBlockSizes_Default,
1905                                        NULL,
1906                                        NULL,
1907                                        NULL,
1908                                        NULL,
1909                                /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1910                                        NULL,
1911                                        NULL,
1912                                        NULL,
1913                                        NULL,
1914                                        NULL,
1915                                /*150*/ NULL
1916 };
1917 
1918 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1919 {
1920   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1921   PetscInt       i,mbs,Mbs;
1922   PetscMPIInt    size;
1923 
1924   PetscFunctionBegin;
1925   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
1926   PetscCall(PetscLayoutSetUp(B->rmap));
1927   PetscCall(PetscLayoutSetUp(B->cmap));
1928   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
1929   PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N);
1930   PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n);
1931 
1932   mbs = B->rmap->n/bs;
1933   Mbs = B->rmap->N/bs;
1934   PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs);
1935 
1936   B->rmap->bs = bs;
1937   b->bs2      = bs*bs;
1938   b->mbs      = mbs;
1939   b->Mbs      = Mbs;
1940   b->nbs      = B->cmap->n/bs;
1941   b->Nbs      = B->cmap->N/bs;
1942 
1943   for (i=0; i<=b->size; i++) {
1944     b->rangebs[i] = B->rmap->range[i]/bs;
1945   }
1946   b->rstartbs = B->rmap->rstart/bs;
1947   b->rendbs   = B->rmap->rend/bs;
1948 
1949   b->cstartbs = B->cmap->rstart/bs;
1950   b->cendbs   = B->cmap->rend/bs;
1951 
1952 #if defined(PETSC_USE_CTABLE)
1953   PetscCall(PetscTableDestroy(&b->colmap));
1954 #else
1955   PetscCall(PetscFree(b->colmap));
1956 #endif
1957   PetscCall(PetscFree(b->garray));
1958   PetscCall(VecDestroy(&b->lvec));
1959   PetscCall(VecScatterDestroy(&b->Mvctx));
1960   PetscCall(VecDestroy(&b->slvec0));
1961   PetscCall(VecDestroy(&b->slvec0b));
1962   PetscCall(VecDestroy(&b->slvec1));
1963   PetscCall(VecDestroy(&b->slvec1a));
1964   PetscCall(VecDestroy(&b->slvec1b));
1965   PetscCall(VecScatterDestroy(&b->sMvctx));
1966 
1967   /* Because the B will have been resized we simply destroy it and create a new one each time */
1968   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
1969   PetscCall(MatDestroy(&b->B));
1970   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
1971   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
1972   PetscCall(MatSetType(b->B,MATSEQBAIJ));
1973   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
1974 
1975   if (!B->preallocated) {
1976     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
1977     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
1978     PetscCall(MatSetType(b->A,MATSEQSBAIJ));
1979     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
1980     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
1981   }
1982 
1983   PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
1984   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
1985 
1986   B->preallocated  = PETSC_TRUE;
1987   B->was_assembled = PETSC_FALSE;
1988   B->assembled     = PETSC_FALSE;
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1993 {
1994   PetscInt       m,rstart,cend;
1995   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
1996   const PetscInt *JJ    =NULL;
1997   PetscScalar    *values=NULL;
1998   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
1999   PetscBool      nooffprocentries;
2000 
2001   PetscFunctionBegin;
2002   PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
2003   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
2004   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
2005   PetscCall(PetscLayoutSetUp(B->rmap));
2006   PetscCall(PetscLayoutSetUp(B->cmap));
2007   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2008   m      = B->rmap->n/bs;
2009   rstart = B->rmap->rstart/bs;
2010   cend   = B->cmap->rend/bs;
2011 
2012   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2013   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2014   for (i=0; i<m; i++) {
2015     nz = ii[i+1] - ii[i];
2016     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2017     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2018     JJ     = jj + ii[i];
2019     bd     = 0;
2020     for (j=0; j<nz; j++) {
2021       if (*JJ >= i + rstart) break;
2022       JJ++;
2023       bd++;
2024     }
2025     d  = 0;
2026     for (; j<nz; j++) {
2027       if (*JJ++ >= cend) break;
2028       d++;
2029     }
2030     d_nnz[i] = d;
2031     o_nnz[i] = nz - d - bd;
2032     nz       = nz - bd;
2033     nz_max = PetscMax(nz_max,nz);
2034   }
2035   PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2036   PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE));
2037   PetscCall(PetscFree2(d_nnz,o_nnz));
2038 
2039   values = (PetscScalar*)V;
2040   if (!values) {
2041     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2042   }
2043   for (i=0; i<m; i++) {
2044     PetscInt          row    = i + rstart;
2045     PetscInt          ncols  = ii[i+1] - ii[i];
2046     const PetscInt    *icols = jj + ii[i];
2047     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2048       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2049       PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2050     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2051       PetscInt j;
2052       for (j=0; j<ncols; j++) {
2053         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2054         PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2055       }
2056     }
2057   }
2058 
2059   if (!V) PetscCall(PetscFree(values));
2060   nooffprocentries    = B->nooffprocentries;
2061   B->nooffprocentries = PETSC_TRUE;
2062   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2063   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2064   B->nooffprocentries = nooffprocentries;
2065 
2066   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 /*MC
2071    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2072    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2073    the matrix is stored.
2074 
2075    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2076    can call MatSetOption(Mat, MAT_HERMITIAN);
2077 
2078    Options Database Keys:
2079 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2080 
2081    Notes:
2082      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2083      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2084 
2085    Level: beginner
2086 
2087 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2088 M*/
2089 
2090 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2091 {
2092   Mat_MPISBAIJ   *b;
2093   PetscBool      flg = PETSC_FALSE;
2094 
2095   PetscFunctionBegin;
2096   PetscCall(PetscNewLog(B,&b));
2097   B->data = (void*)b;
2098   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2099 
2100   B->ops->destroy = MatDestroy_MPISBAIJ;
2101   B->ops->view    = MatView_MPISBAIJ;
2102   B->assembled    = PETSC_FALSE;
2103   B->insertmode   = NOT_SET_VALUES;
2104 
2105   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2106   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2107 
2108   /* build local table of row and column ownerships */
2109   PetscCall(PetscMalloc1(b->size+2,&b->rangebs));
2110 
2111   /* build cache for off array entries formed */
2112   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2113 
2114   b->donotstash  = PETSC_FALSE;
2115   b->colmap      = NULL;
2116   b->garray      = NULL;
2117   b->roworiented = PETSC_TRUE;
2118 
2119   /* stuff used in block assembly */
2120   b->barray = NULL;
2121 
2122   /* stuff used for matrix vector multiply */
2123   b->lvec    = NULL;
2124   b->Mvctx   = NULL;
2125   b->slvec0  = NULL;
2126   b->slvec0b = NULL;
2127   b->slvec1  = NULL;
2128   b->slvec1a = NULL;
2129   b->slvec1b = NULL;
2130   b->sMvctx  = NULL;
2131 
2132   /* stuff for MatGetRow() */
2133   b->rowindices   = NULL;
2134   b->rowvalues    = NULL;
2135   b->getrowactive = PETSC_FALSE;
2136 
2137   /* hash table stuff */
2138   b->ht           = NULL;
2139   b->hd           = NULL;
2140   b->ht_size      = 0;
2141   b->ht_flag      = PETSC_FALSE;
2142   b->ht_fact      = 0;
2143   b->ht_total_ct  = 0;
2144   b->ht_insert_ct = 0;
2145 
2146   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2147   b->ijonly = PETSC_FALSE;
2148 
2149   b->in_loc = NULL;
2150   b->v_loc  = NULL;
2151   b->n_loc  = 0;
2152 
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ));
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ));
2155   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ));
2156   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2157 #if defined(PETSC_HAVE_ELEMENTAL)
2158   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental));
2159 #endif
2160 #if defined(PETSC_HAVE_SCALAPACK)
2161   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK));
2162 #endif
2163   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic));
2164   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic));
2165 
2166   B->symmetric                   = PETSC_BOOL3_TRUE;
2167   B->structurally_symmetric      = PETSC_BOOL3_TRUE;
2168   B->symmetry_eternal            = PETSC_TRUE;
2169   B->structural_symmetry_eternal = PETSC_TRUE ;
2170 #if defined(PETSC_USE_COMPLEX)
2171   B->hermitian                   = PETSC_BOOL3_FALSE;
2172 #else
2173   B->hermitian                   = PETSC_BOOL3_TRUE;
2174 #endif
2175 
2176   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ));
2177   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2178   PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL));
2179   if (flg) {
2180     PetscReal fact = 1.39;
2181     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2182     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2183     if (fact <= 1.0) fact = 1.39;
2184     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2185     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2186   }
2187   PetscOptionsEnd();
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 /*MC
2192    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2193 
2194    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2195    and MATMPISBAIJ otherwise.
2196 
2197    Options Database Keys:
2198 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2199 
2200   Level: beginner
2201 
2202 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2203 M*/
2204 
2205 /*@C
2206    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2207    the user should preallocate the matrix storage by setting the parameters
2208    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2209    performance can be increased by more than a factor of 50.
2210 
2211    Collective on Mat
2212 
2213    Input Parameters:
2214 +  B - the matrix
2215 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2216           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2217 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2218            submatrix  (same for all local rows)
2219 .  d_nnz - array containing the number of block nonzeros in the various block rows
2220            in the upper triangular and diagonal part of the in diagonal portion of the local
2221            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2222            for the diagonal entry and set a value even if it is zero.
2223 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2224            submatrix (same for all local rows).
2225 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2226            off-diagonal portion of the local submatrix that is right of the diagonal
2227            (possibly different for each block row) or NULL.
2228 
2229    Options Database Keys:
2230 +   -mat_no_unroll - uses code that does not unroll the loops in the
2231                      block calculations (much slower)
2232 -   -mat_block_size - size of the blocks to use
2233 
2234    Notes:
2235 
2236    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2237    than it must be used on all processors that share the object for that argument.
2238 
2239    If the *_nnz parameter is given then the *_nz parameter is ignored
2240 
2241    Storage Information:
2242    For a square global matrix we define each processor's diagonal portion
2243    to be its local rows and the corresponding columns (a square submatrix);
2244    each processor's off-diagonal portion encompasses the remainder of the
2245    local matrix (a rectangular submatrix).
2246 
2247    The user can specify preallocated storage for the diagonal part of
2248    the local submatrix with either d_nz or d_nnz (not both).  Set
2249    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2250    memory allocation.  Likewise, specify preallocated storage for the
2251    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2252 
2253    You can call MatGetInfo() to get information on how effective the preallocation was;
2254    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2255    You can also run with the option -info and look for messages with the string
2256    malloc in them to see if additional memory allocation was needed.
2257 
2258    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2259    the figure below we depict these three local rows and all columns (0-11).
2260 
2261 .vb
2262            0 1 2 3 4 5 6 7 8 9 10 11
2263           --------------------------
2264    row 3  |. . . d d d o o o o  o  o
2265    row 4  |. . . d d d o o o o  o  o
2266    row 5  |. . . d d d o o o o  o  o
2267           --------------------------
2268 .ve
2269 
2270    Thus, any entries in the d locations are stored in the d (diagonal)
2271    submatrix, and any entries in the o locations are stored in the
2272    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2273    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2274 
2275    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2276    plus the diagonal part of the d matrix,
2277    and o_nz should indicate the number of block nonzeros per row in the o matrix
2278 
2279    In general, for PDE problems in which most nonzeros are near the diagonal,
2280    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2281    or you will get TERRIBLE performance; see the users' manual chapter on
2282    matrices.
2283 
2284    Level: intermediate
2285 
2286 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2287 @*/
2288 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2289 {
2290   PetscFunctionBegin;
2291   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2292   PetscValidType(B,1);
2293   PetscValidLogicalCollectiveInt(B,bs,2);
2294   PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2295   PetscFunctionReturn(0);
2296 }
2297 
2298 /*@C
2299    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2300    (block compressed row).  For good matrix assembly performance
2301    the user should preallocate the matrix storage by setting the parameters
2302    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2303    performance can be increased by more than a factor of 50.
2304 
2305    Collective
2306 
2307    Input Parameters:
2308 +  comm - MPI communicator
2309 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2310           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2311 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2312            This value should be the same as the local size used in creating the
2313            y vector for the matrix-vector product y = Ax.
2314 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2315            This value should be the same as the local size used in creating the
2316            x vector for the matrix-vector product y = Ax.
2317 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2318 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2319 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2320            submatrix  (same for all local rows)
2321 .  d_nnz - array containing the number of block nonzeros in the various block rows
2322            in the upper triangular portion of the in diagonal portion of the local
2323            (possibly different for each block block row) or NULL.
2324            If you plan to factor the matrix you must leave room for the diagonal entry and
2325            set its value even if it is zero.
2326 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2327            submatrix (same for all local rows).
2328 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2329            off-diagonal portion of the local submatrix (possibly different for
2330            each block row) or NULL.
2331 
2332    Output Parameter:
2333 .  A - the matrix
2334 
2335    Options Database Keys:
2336 +   -mat_no_unroll - uses code that does not unroll the loops in the
2337                      block calculations (much slower)
2338 .   -mat_block_size - size of the blocks to use
2339 -   -mat_mpi - use the parallel matrix data structures even on one processor
2340                (defaults to using SeqBAIJ format on one processor)
2341 
2342    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2343    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2344    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2345 
2346    Notes:
2347    The number of rows and columns must be divisible by blocksize.
2348    This matrix type does not support complex Hermitian operation.
2349 
2350    The user MUST specify either the local or global matrix dimensions
2351    (possibly both).
2352 
2353    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2354    than it must be used on all processors that share the object for that argument.
2355 
2356    If the *_nnz parameter is given then the *_nz parameter is ignored
2357 
2358    Storage Information:
2359    For a square global matrix we define each processor's diagonal portion
2360    to be its local rows and the corresponding columns (a square submatrix);
2361    each processor's off-diagonal portion encompasses the remainder of the
2362    local matrix (a rectangular submatrix).
2363 
2364    The user can specify preallocated storage for the diagonal part of
2365    the local submatrix with either d_nz or d_nnz (not both).  Set
2366    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2367    memory allocation.  Likewise, specify preallocated storage for the
2368    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2369 
2370    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2371    the figure below we depict these three local rows and all columns (0-11).
2372 
2373 .vb
2374            0 1 2 3 4 5 6 7 8 9 10 11
2375           --------------------------
2376    row 3  |. . . d d d o o o o  o  o
2377    row 4  |. . . d d d o o o o  o  o
2378    row 5  |. . . d d d o o o o  o  o
2379           --------------------------
2380 .ve
2381 
2382    Thus, any entries in the d locations are stored in the d (diagonal)
2383    submatrix, and any entries in the o locations are stored in the
2384    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2385    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2386 
2387    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2388    plus the diagonal part of the d matrix,
2389    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2390    In general, for PDE problems in which most nonzeros are near the diagonal,
2391    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2392    or you will get TERRIBLE performance; see the users' manual chapter on
2393    matrices.
2394 
2395    Level: intermediate
2396 
2397 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2398 @*/
2399 
2400 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2401 {
2402   PetscMPIInt    size;
2403 
2404   PetscFunctionBegin;
2405   PetscCall(MatCreate(comm,A));
2406   PetscCall(MatSetSizes(*A,m,n,M,N));
2407   PetscCallMPI(MPI_Comm_size(comm,&size));
2408   if (size > 1) {
2409     PetscCall(MatSetType(*A,MATMPISBAIJ));
2410     PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
2411   } else {
2412     PetscCall(MatSetType(*A,MATSEQSBAIJ));
2413     PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
2414   }
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2419 {
2420   Mat            mat;
2421   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2422   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2423   PetscScalar    *array;
2424 
2425   PetscFunctionBegin;
2426   *newmat = NULL;
2427 
2428   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2429   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2430   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2431   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2432   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2433 
2434   mat->factortype   = matin->factortype;
2435   mat->preallocated = PETSC_TRUE;
2436   mat->assembled    = PETSC_TRUE;
2437   mat->insertmode   = NOT_SET_VALUES;
2438 
2439   a      = (Mat_MPISBAIJ*)mat->data;
2440   a->bs2 = oldmat->bs2;
2441   a->mbs = oldmat->mbs;
2442   a->nbs = oldmat->nbs;
2443   a->Mbs = oldmat->Mbs;
2444   a->Nbs = oldmat->Nbs;
2445 
2446   a->size         = oldmat->size;
2447   a->rank         = oldmat->rank;
2448   a->donotstash   = oldmat->donotstash;
2449   a->roworiented  = oldmat->roworiented;
2450   a->rowindices   = NULL;
2451   a->rowvalues    = NULL;
2452   a->getrowactive = PETSC_FALSE;
2453   a->barray       = NULL;
2454   a->rstartbs     = oldmat->rstartbs;
2455   a->rendbs       = oldmat->rendbs;
2456   a->cstartbs     = oldmat->cstartbs;
2457   a->cendbs       = oldmat->cendbs;
2458 
2459   /* hash table stuff */
2460   a->ht           = NULL;
2461   a->hd           = NULL;
2462   a->ht_size      = 0;
2463   a->ht_flag      = oldmat->ht_flag;
2464   a->ht_fact      = oldmat->ht_fact;
2465   a->ht_total_ct  = 0;
2466   a->ht_insert_ct = 0;
2467 
2468   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2));
2469   if (oldmat->colmap) {
2470 #if defined(PETSC_USE_CTABLE)
2471     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2472 #else
2473     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
2474     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
2475     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
2476 #endif
2477   } else a->colmap = NULL;
2478 
2479   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2480     PetscCall(PetscMalloc1(len,&a->garray));
2481     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2482     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2483   } else a->garray = NULL;
2484 
2485   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
2486   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2487   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2488   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2489   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2490 
2491   PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0));
2492   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2493   PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1));
2494   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2495 
2496   PetscCall(VecGetLocalSize(a->slvec1,&nt));
2497   PetscCall(VecGetArray(a->slvec1,&array));
2498   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a));
2499   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b));
2500   PetscCall(VecRestoreArray(a->slvec1,&array));
2501   PetscCall(VecGetArray(a->slvec0,&array));
2502   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b));
2503   PetscCall(VecRestoreArray(a->slvec0,&array));
2504   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2505   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2506   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b));
2507   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a));
2508   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b));
2509 
2510   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2511   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2512   a->sMvctx = oldmat->sMvctx;
2513   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx));
2514 
2515   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2516   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2517   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2518   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2519   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2520   *newmat = mat;
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2525 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2526 
2527 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2528 {
2529   PetscBool      isbinary;
2530 
2531   PetscFunctionBegin;
2532   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2533   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2534   PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer));
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 /*XXXXX@
2539    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2540 
2541    Input Parameters:
2542 .  mat  - the matrix
2543 .  fact - factor
2544 
2545    Not Collective on Mat, each process can have a different hash factor
2546 
2547    Level: advanced
2548 
2549   Notes:
2550    This can also be set by the command line option: -mat_use_hash_table fact
2551 
2552 .seealso: `MatSetOption()`
2553 @XXXXX*/
2554 
2555 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2556 {
2557   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2558   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2559   PetscReal      atmp;
2560   PetscReal      *work,*svalues,*rvalues;
2561   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2562   PetscMPIInt    rank,size;
2563   PetscInt       *rowners_bs,dest,count,source;
2564   PetscScalar    *va;
2565   MatScalar      *ba;
2566   MPI_Status     stat;
2567 
2568   PetscFunctionBegin;
2569   PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2570   PetscCall(MatGetRowMaxAbs(a->A,v,NULL));
2571   PetscCall(VecGetArray(v,&va));
2572 
2573   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2574   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2575 
2576   bs  = A->rmap->bs;
2577   mbs = a->mbs;
2578   Mbs = a->Mbs;
2579   ba  = b->a;
2580   bi  = b->i;
2581   bj  = b->j;
2582 
2583   /* find ownerships */
2584   rowners_bs = A->rmap->range;
2585 
2586   /* each proc creates an array to be distributed */
2587   PetscCall(PetscCalloc1(bs*Mbs,&work));
2588 
2589   /* row_max for B */
2590   if (rank != size-1) {
2591     for (i=0; i<mbs; i++) {
2592       ncols = bi[1] - bi[0]; bi++;
2593       brow  = bs*i;
2594       for (j=0; j<ncols; j++) {
2595         bcol = bs*(*bj);
2596         for (kcol=0; kcol<bs; kcol++) {
2597           col  = bcol + kcol;                /* local col index */
2598           col += rowners_bs[rank+1];      /* global col index */
2599           for (krow=0; krow<bs; krow++) {
2600             atmp = PetscAbsScalar(*ba); ba++;
2601             row  = brow + krow;   /* local row index */
2602             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2603             if (work[col] < atmp) work[col] = atmp;
2604           }
2605         }
2606         bj++;
2607       }
2608     }
2609 
2610     /* send values to its owners */
2611     for (dest=rank+1; dest<size; dest++) {
2612       svalues = work + rowners_bs[dest];
2613       count   = rowners_bs[dest+1]-rowners_bs[dest];
2614       PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A)));
2615     }
2616   }
2617 
2618   /* receive values */
2619   if (rank) {
2620     rvalues = work;
2621     count   = rowners_bs[rank+1]-rowners_bs[rank];
2622     for (source=0; source<rank; source++) {
2623       PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat));
2624       /* process values */
2625       for (i=0; i<count; i++) {
2626         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2627       }
2628     }
2629   }
2630 
2631   PetscCall(VecRestoreArray(v,&va));
2632   PetscCall(PetscFree(work));
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2637 {
2638   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2639   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2640   PetscScalar       *x,*ptr,*from;
2641   Vec               bb1;
2642   const PetscScalar *b;
2643 
2644   PetscFunctionBegin;
2645   PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
2646   PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2647 
2648   if (flag == SOR_APPLY_UPPER) {
2649     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2650     PetscFunctionReturn(0);
2651   }
2652 
2653   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2654     if (flag & SOR_ZERO_INITIAL_GUESS) {
2655       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx));
2656       its--;
2657     }
2658 
2659     PetscCall(VecDuplicate(bb,&bb1));
2660     while (its--) {
2661 
2662       /* lower triangular part: slvec0b = - B^T*xx */
2663       PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2664 
2665       /* copy xx into slvec0a */
2666       PetscCall(VecGetArray(mat->slvec0,&ptr));
2667       PetscCall(VecGetArray(xx,&x));
2668       PetscCall(PetscArraycpy(ptr,x,bs*mbs));
2669       PetscCall(VecRestoreArray(mat->slvec0,&ptr));
2670 
2671       PetscCall(VecScale(mat->slvec0,-1.0));
2672 
2673       /* copy bb into slvec1a */
2674       PetscCall(VecGetArray(mat->slvec1,&ptr));
2675       PetscCall(VecGetArrayRead(bb,&b));
2676       PetscCall(PetscArraycpy(ptr,b,bs*mbs));
2677       PetscCall(VecRestoreArray(mat->slvec1,&ptr));
2678 
2679       /* set slvec1b = 0 */
2680       PetscCall(VecSet(mat->slvec1b,0.0));
2681 
2682       PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2683       PetscCall(VecRestoreArray(xx,&x));
2684       PetscCall(VecRestoreArrayRead(bb,&b));
2685       PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2686 
2687       /* upper triangular part: bb1 = bb1 - B*x */
2688       PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1));
2689 
2690       /* local diagonal sweep */
2691       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx));
2692     }
2693     PetscCall(VecDestroy(&bb1));
2694   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2695     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2696   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2697     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2698   } else if (flag & SOR_EISENSTAT) {
2699     Vec               xx1;
2700     PetscBool         hasop;
2701     const PetscScalar *diag;
2702     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2703     PetscInt          i,n;
2704 
2705     if (!mat->xx1) {
2706       PetscCall(VecDuplicate(bb,&mat->xx1));
2707       PetscCall(VecDuplicate(bb,&mat->bb1));
2708     }
2709     xx1 = mat->xx1;
2710     bb1 = mat->bb1;
2711 
2712     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
2713 
2714     if (!mat->diag) {
2715       /* this is wrong for same matrix with new nonzero values */
2716       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
2717       PetscCall(MatGetDiagonal(matin,mat->diag));
2718     }
2719     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
2720 
2721     if (hasop) {
2722       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
2723       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2724     } else {
2725       /*
2726           These two lines are replaced by code that may be a bit faster for a good compiler
2727       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2728       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2729       */
2730       PetscCall(VecGetArray(mat->slvec1a,&sl));
2731       PetscCall(VecGetArrayRead(mat->diag,&diag));
2732       PetscCall(VecGetArrayRead(bb,&b));
2733       PetscCall(VecGetArray(xx,&x));
2734       PetscCall(VecGetLocalSize(xx,&n));
2735       if (omega == 1.0) {
2736         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2737         PetscCall(PetscLogFlops(2.0*n));
2738       } else {
2739         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2740         PetscCall(PetscLogFlops(3.0*n));
2741       }
2742       PetscCall(VecRestoreArray(mat->slvec1a,&sl));
2743       PetscCall(VecRestoreArrayRead(mat->diag,&diag));
2744       PetscCall(VecRestoreArrayRead(bb,&b));
2745       PetscCall(VecRestoreArray(xx,&x));
2746     }
2747 
2748     /* multiply off-diagonal portion of matrix */
2749     PetscCall(VecSet(mat->slvec1b,0.0));
2750     PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2751     PetscCall(VecGetArray(mat->slvec0,&from));
2752     PetscCall(VecGetArray(xx,&x));
2753     PetscCall(PetscArraycpy(from,x,bs*mbs));
2754     PetscCall(VecRestoreArray(mat->slvec0,&from));
2755     PetscCall(VecRestoreArray(xx,&x));
2756     PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2757     PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2758     PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a));
2759 
2760     /* local sweep */
2761     PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
2762     PetscCall(VecAXPY(xx,1.0,xx1));
2763   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 /*@
2768      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2769          CSR format the local rows.
2770 
2771    Collective
2772 
2773    Input Parameters:
2774 +  comm - MPI communicator
2775 .  bs - the block size, only a block size of 1 is supported
2776 .  m - number of local rows (Cannot be PETSC_DECIDE)
2777 .  n - This value should be the same as the local size used in creating the
2778        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2779        calculated if N is given) For square matrices n is almost always m.
2780 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2781 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2782 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2783 .   j - column indices
2784 -   a - matrix values
2785 
2786    Output Parameter:
2787 .   mat - the matrix
2788 
2789    Level: intermediate
2790 
2791    Notes:
2792        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2793      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2794      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2795 
2796        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2797 
2798 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2799           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2800 @*/
2801 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2802 {
2803   PetscFunctionBegin;
2804   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2805   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2806   PetscCall(MatCreate(comm,mat));
2807   PetscCall(MatSetSizes(*mat,m,n,M,N));
2808   PetscCall(MatSetType(*mat,MATMPISBAIJ));
2809   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a));
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 /*@C
2814    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2815 
2816    Collective
2817 
2818    Input Parameters:
2819 +  B - the matrix
2820 .  bs - the block size
2821 .  i - the indices into j for the start of each local row (starts with zero)
2822 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2823 -  v - optional values in the matrix
2824 
2825    Level: advanced
2826 
2827    Notes:
2828    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2829    and usually the numerical values as well
2830 
2831    Any entries below the diagonal are ignored
2832 
2833 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2834 @*/
2835 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2836 {
2837   PetscFunctionBegin;
2838   PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2843 {
2844   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2845   PetscInt       *indx;
2846   PetscScalar    *values;
2847 
2848   PetscFunctionBegin;
2849   PetscCall(MatGetSize(inmat,&m,&N));
2850   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2851     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2852     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2853     PetscInt       *bindx,rmax=a->rmax,j;
2854     PetscMPIInt    rank,size;
2855 
2856     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2857     mbs = m/bs; Nbs = N/cbs;
2858     if (n == PETSC_DECIDE) {
2859       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
2860     }
2861     nbs = n/cbs;
2862 
2863     PetscCall(PetscMalloc1(rmax,&bindx));
2864     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
2865 
2866     PetscCallMPI(MPI_Comm_rank(comm,&rank));
2867     PetscCallMPI(MPI_Comm_rank(comm,&size));
2868     if (rank == size-1) {
2869       /* Check sum(nbs) = Nbs */
2870       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
2871     }
2872 
2873     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2874     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2875     for (i=0; i<mbs; i++) {
2876       PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
2877       nnz  = nnz/bs;
2878       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2879       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
2880       PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL));
2881     }
2882     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2883     PetscCall(PetscFree(bindx));
2884 
2885     PetscCall(MatCreate(comm,outmat));
2886     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
2887     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
2888     PetscCall(MatSetType(*outmat,MATSBAIJ));
2889     PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz));
2890     PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
2891     MatPreallocateEnd(dnz,onz);
2892   }
2893 
2894   /* numeric phase */
2895   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2896   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
2897 
2898   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2899   for (i=0; i<m; i++) {
2900     PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2901     Ii   = i + rstart;
2902     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
2903     PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2904   }
2905   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2906   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
2907   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
2908   PetscFunctionReturn(0);
2909 }
2910