xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 76be6f4ff3bd4e251c19fc00ebbebfd58b6e7589)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21 
22   PetscFunctionBegin;
23   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
24   PetscValidLogicalCollectiveInt(B,nm,2);
25   for (i = 0; i < nm; i++) {
26     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
27     PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong));
28     PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
29   }
30   PetscValidLogicalCollectiveBool(B,fill,5);
31   PetscCall(MatGetBlockSize(B,&bs));
32   PetscCall(MatGetSize(B,&M,&N));
33   PetscCall(MatGetLocalSize(B,&m,&n));
34   PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator));
35   PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
36   PetscCall(MatSetBlockSize(preallocator,bs));
37   PetscCall(MatSetSizes(preallocator,m,n,M,N));
38   PetscCall(MatSetUp(preallocator));
39   PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
40   for (r = rstart; r < rend; ++r) {
41     PetscInt          ncols;
42     const PetscInt    *row;
43     const PetscScalar *vals;
44 
45     for (i = 0; i < nm; i++) {
46       PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals));
47       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
48       if (symm && symm[i]) {
49         PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES));
50       }
51       PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals));
52     }
53   }
54   PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
55   PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
56   PetscCall(MatPreallocatorPreallocate(preallocator,fill,B));
57   PetscCall(MatDestroy(&preallocator));
58   PetscFunctionReturn(0);
59 }
60 
61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
62 {
63   Mat            B;
64   PetscInt       r;
65 
66   PetscFunctionBegin;
67   if (reuse != MAT_REUSE_MATRIX) {
68     PetscBool symm = PETSC_TRUE,isdense;
69     PetscInt  bs;
70 
71     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
72     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
73     PetscCall(MatSetType(B,newtype));
74     PetscCall(MatGetBlockSize(A,&bs));
75     PetscCall(MatSetBlockSize(B,bs));
76     PetscCall(PetscLayoutSetUp(B->rmap));
77     PetscCall(PetscLayoutSetUp(B->cmap));
78     PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,""));
79     if (!isdense) {
80       PetscCall(MatGetRowUpperTriangular(A));
81       PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE));
82       PetscCall(MatRestoreRowUpperTriangular(A));
83     } else {
84       PetscCall(MatSetUp(B));
85     }
86   } else {
87     B    = *newmat;
88     PetscCall(MatZeroEntries(B));
89   }
90 
91   PetscCall(MatGetRowUpperTriangular(A));
92   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
93     PetscInt          ncols;
94     const PetscInt    *row;
95     const PetscScalar *vals;
96 
97     PetscCall(MatGetRow(A,r,&ncols,&row,&vals));
98     PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES));
99 #if defined(PETSC_USE_COMPLEX)
100     if (A->hermitian) {
101       PetscInt i;
102       for (i = 0; i < ncols; i++) {
103         PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES));
104       }
105     } else {
106       PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
107     }
108 #else
109     PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
110 #endif
111     PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals));
112   }
113   PetscCall(MatRestoreRowUpperTriangular(A));
114   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
116 
117   if (reuse == MAT_INPLACE_MATRIX) {
118     PetscCall(MatHeaderReplace(A,&B));
119   } else {
120     *newmat = B;
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
126 {
127   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
128 
129   PetscFunctionBegin;
130   PetscCall(MatStoreValues(aij->A));
131   PetscCall(MatStoreValues(aij->B));
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138 
139   PetscFunctionBegin;
140   PetscCall(MatRetrieveValues(aij->A));
141   PetscCall(MatRetrieveValues(aij->B));
142   PetscFunctionReturn(0);
143 }
144 
145 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
146   { \
147     brow = row/bs;  \
148     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
149     rmax = aimax[brow]; nrow = ailen[brow]; \
150     bcol = col/bs; \
151     ridx = row % bs; cidx = col % bs; \
152     low  = 0; high = nrow; \
153     while (high-low > 3) { \
154       t = (low+high)/2; \
155       if (rp[t] > bcol) high = t; \
156       else              low  = t; \
157     } \
158     for (_i=low; _i<high; _i++) { \
159       if (rp[_i] > bcol) break; \
160       if (rp[_i] == bcol) { \
161         bap = ap + bs2*_i + bs*cidx + ridx; \
162         if (addv == ADD_VALUES) *bap += value;  \
163         else                    *bap  = value;  \
164         goto a_noinsert; \
165       } \
166     } \
167     if (a->nonew == 1) goto a_noinsert; \
168     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
170     N = nrow++ - 1;  \
171     /* shift up all the later entries in this row */ \
172     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
173     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
174     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
175     rp[_i]                      = bcol;  \
176     ap[bs2*_i + bs*cidx + ridx] = value;  \
177     A->nonzerostate++;\
178 a_noinsert:; \
179     ailen[brow] = nrow; \
180   }
181 
182 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
183   { \
184     brow = row/bs;  \
185     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
186     rmax = bimax[brow]; nrow = bilen[brow]; \
187     bcol = col/bs; \
188     ridx = row % bs; cidx = col % bs; \
189     low  = 0; high = nrow; \
190     while (high-low > 3) { \
191       t = (low+high)/2; \
192       if (rp[t] > bcol) high = t; \
193       else              low  = t; \
194     } \
195     for (_i=low; _i<high; _i++) { \
196       if (rp[_i] > bcol) break; \
197       if (rp[_i] == bcol) { \
198         bap = ap + bs2*_i + bs*cidx + ridx; \
199         if (addv == ADD_VALUES) *bap += value;  \
200         else                    *bap  = value;  \
201         goto b_noinsert; \
202       } \
203     } \
204     if (b->nonew == 1) goto b_noinsert; \
205     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
206     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
207     N = nrow++ - 1;  \
208     /* shift up all the later entries in this row */ \
209     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
210     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
211     PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \
212     rp[_i]                      = bcol;  \
213     ap[bs2*_i + bs*cidx + ridx] = value;  \
214     B->nonzerostate++;\
215 b_noinsert:; \
216     bilen[brow] = nrow; \
217   }
218 
219 /* Only add/insert a(i,j) with i<=j (blocks).
220    Any a(i,j) with i>j input by user is ingored or generates an error
221 */
222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
223 {
224   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
225   MatScalar      value;
226   PetscBool      roworiented = baij->roworiented;
227   PetscInt       i,j,row,col;
228   PetscInt       rstart_orig=mat->rmap->rstart;
229   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
230   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
231 
232   /* Some Variables required in the macro */
233   Mat          A     = baij->A;
234   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
235   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
236   MatScalar    *aa   =a->a;
237 
238   Mat         B     = baij->B;
239   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
240   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
241   MatScalar   *ba   =b->a;
242 
243   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
244   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
245   MatScalar *ap,*bap;
246 
247   /* for stash */
248   PetscInt  n_loc, *in_loc = NULL;
249   MatScalar *v_loc = NULL;
250 
251   PetscFunctionBegin;
252   if (!baij->donotstash) {
253     if (n > baij->n_loc) {
254       PetscCall(PetscFree(baij->in_loc));
255       PetscCall(PetscFree(baij->v_loc));
256       PetscCall(PetscMalloc1(n,&baij->in_loc));
257       PetscCall(PetscMalloc1(n,&baij->v_loc));
258 
259       baij->n_loc = n;
260     }
261     in_loc = baij->in_loc;
262     v_loc  = baij->v_loc;
263   }
264 
265   for (i=0; i<m; i++) {
266     if (im[i] < 0) continue;
267     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
268     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
269       row = im[i] - rstart_orig;              /* local row index */
270       for (j=0; j<n; j++) {
271         if (im[i]/bs > in[j]/bs) {
272           if (a->ignore_ltriangular) {
273             continue;    /* ignore lower triangular blocks */
274           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
275         }
276         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
277           col  = in[j] - cstart_orig;         /* local col index */
278           brow = row/bs; bcol = col/bs;
279           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
280           if (roworiented) value = v[i*n+j];
281           else             value = v[i+j*m];
282           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
283           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
284         } else if (in[j] < 0) continue;
285         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
286         else {  /* off-diag entry (B) */
287           if (mat->was_assembled) {
288             if (!baij->colmap) {
289               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
290             }
291 #if defined(PETSC_USE_CTABLE)
292             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
293             col  = col - 1;
294 #else
295             col = baij->colmap[in[j]/bs] - 1;
296 #endif
297             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
298               PetscCall(MatDisAssemble_MPISBAIJ(mat));
299               col  =  in[j];
300               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
301               B    = baij->B;
302               b    = (Mat_SeqBAIJ*)(B)->data;
303               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
304               ba   = b->a;
305             } else col += in[j]%bs;
306           } else col = in[j];
307           if (roworiented) value = v[i*n+j];
308           else             value = v[i+j*m];
309           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
310           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
311         }
312       }
313     } else {  /* off processor entry */
314       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
315       if (!baij->donotstash) {
316         mat->assembled = PETSC_FALSE;
317         n_loc          = 0;
318         for (j=0; j<n; j++) {
319           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
320           in_loc[n_loc] = in[j];
321           if (roworiented) {
322             v_loc[n_loc] = v[i*n+j];
323           } else {
324             v_loc[n_loc] = v[j*m+i];
325           }
326           n_loc++;
327         }
328         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE));
329       }
330     }
331   }
332   PetscFunctionReturn(0);
333 }
334 
335 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
336 {
337   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
338   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
339   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
340   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
341   PetscBool         roworiented=a->roworiented;
342   const PetscScalar *value     = v;
343   MatScalar         *ap,*aa = a->a,*bap;
344 
345   PetscFunctionBegin;
346   if (col < row) {
347     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
348     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
349   }
350   rp   = aj + ai[row];
351   ap   = aa + bs2*ai[row];
352   rmax = imax[row];
353   nrow = ailen[row];
354   value = v;
355   low   = 0;
356   high  = nrow;
357 
358   while (high-low > 7) {
359     t = (low+high)/2;
360     if (rp[t] > col) high = t;
361     else             low  = t;
362   }
363   for (i=low; i<high; i++) {
364     if (rp[i] > col) break;
365     if (rp[i] == col) {
366       bap = ap +  bs2*i;
367       if (roworiented) {
368         if (is == ADD_VALUES) {
369           for (ii=0; ii<bs; ii++) {
370             for (jj=ii; jj<bs2; jj+=bs) {
371               bap[jj] += *value++;
372             }
373           }
374         } else {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] = *value++;
378             }
379           }
380         }
381       } else {
382         if (is == ADD_VALUES) {
383           for (ii=0; ii<bs; ii++) {
384             for (jj=0; jj<bs; jj++) {
385               *bap++ += *value++;
386             }
387           }
388         } else {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++  = *value++;
392             }
393           }
394         }
395       }
396       goto noinsert2;
397     }
398   }
399   if (nonew == 1) goto noinsert2;
400   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
401   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
402   N = nrow++ - 1; high++;
403   /* shift up all the later entries in this row */
404   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
405   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427    This routine is exactly duplicated in mpibaij.c
428 */
429 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
430 {
431   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
432   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
433   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
434   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
435   PetscBool         roworiented=a->roworiented;
436   const PetscScalar *value     = v;
437   MatScalar         *ap,*aa = a->a,*bap;
438 
439   PetscFunctionBegin;
440   rp   = aj + ai[row];
441   ap   = aa + bs2*ai[row];
442   rmax = imax[row];
443   nrow = ailen[row];
444   low  = 0;
445   high = nrow;
446   value = v;
447   while (high-low > 7) {
448     t = (low+high)/2;
449     if (rp[t] > col) high = t;
450     else             low  = t;
451   }
452   for (i=low; i<high; i++) {
453     if (rp[i] > col) break;
454     if (rp[i] == col) {
455       bap = ap +  bs2*i;
456       if (roworiented) {
457         if (is == ADD_VALUES) {
458           for (ii=0; ii<bs; ii++) {
459             for (jj=ii; jj<bs2; jj+=bs) {
460               bap[jj] += *value++;
461             }
462           }
463         } else {
464           for (ii=0; ii<bs; ii++) {
465             for (jj=ii; jj<bs2; jj+=bs) {
466               bap[jj] = *value++;
467             }
468           }
469         }
470       } else {
471         if (is == ADD_VALUES) {
472           for (ii=0; ii<bs; ii++,value+=bs) {
473             for (jj=0; jj<bs; jj++) {
474               bap[jj] += value[jj];
475             }
476             bap += bs;
477           }
478         } else {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj]  = value[jj];
482             }
483             bap += bs;
484           }
485         }
486       }
487       goto noinsert2;
488     }
489   }
490   if (nonew == 1) goto noinsert2;
491   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
492   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
493   N = nrow++ - 1; high++;
494   /* shift up all the later entries in this row */
495   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
496   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
497   rp[i] = col;
498   bap   = ap +  bs2*i;
499   if (roworiented) {
500     for (ii=0; ii<bs; ii++) {
501       for (jj=ii; jj<bs2; jj+=bs) {
502         bap[jj] = *value++;
503       }
504     }
505   } else {
506     for (ii=0; ii<bs; ii++) {
507       for (jj=0; jj<bs; jj++) {
508         *bap++ = *value++;
509       }
510     }
511   }
512   noinsert2:;
513   ailen[row] = nrow;
514   PetscFunctionReturn(0);
515 }
516 
517 /*
518     This routine could be optimized by removing the need for the block copy below and passing stride information
519   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
520 */
521 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
522 {
523   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
524   const MatScalar *value;
525   MatScalar       *barray     =baij->barray;
526   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
527   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
528   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
529   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
530 
531   PetscFunctionBegin;
532   if (!barray) {
533     PetscCall(PetscMalloc1(bs2,&barray));
534     baij->barray = barray;
535   }
536 
537   if (roworiented) {
538     stepval = (n-1)*bs;
539   } else {
540     stepval = (m-1)*bs;
541   }
542   for (i=0; i<m; i++) {
543     if (im[i] < 0) continue;
544     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
545     if (im[i] >= rstart && im[i] < rend) {
546       row = im[i] - rstart;
547       for (j=0; j<n; j++) {
548         if (im[i] > in[j]) {
549           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
550           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
551         }
552         /* If NumCol = 1 then a copy is not required */
553         if ((roworiented) && (n == 1)) {
554           barray = (MatScalar*) v + i*bs2;
555         } else if ((!roworiented) && (m == 1)) {
556           barray = (MatScalar*) v + j*bs2;
557         } else { /* Here a copy is required */
558           if (roworiented) {
559             value = v + i*(stepval+bs)*bs + j*bs;
560           } else {
561             value = v + j*(stepval+bs)*bs + i*bs;
562           }
563           for (ii=0; ii<bs; ii++,value+=stepval) {
564             for (jj=0; jj<bs; jj++) {
565               *barray++ = *value++;
566             }
567           }
568           barray -=bs2;
569         }
570 
571         if (in[j] >= cstart && in[j] < cend) {
572           col  = in[j] - cstart;
573           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
574         } else if (in[j] < 0) continue;
575         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
576         else {
577           if (mat->was_assembled) {
578             if (!baij->colmap) {
579               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
580             }
581 
582 #if defined(PETSC_USE_DEBUG)
583 #if defined(PETSC_USE_CTABLE)
584             { PetscInt data;
585               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
586               PetscCheck((data - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
587             }
588 #else
589             PetscCheck((baij->colmap[in[j]] - 1) % bs == 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
590 #endif
591 #endif
592 #if defined(PETSC_USE_CTABLE)
593             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
594             col  = (col - 1)/bs;
595 #else
596             col = (baij->colmap[in[j]] - 1)/bs;
597 #endif
598             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
599               PetscCall(MatDisAssemble_MPISBAIJ(mat));
600               col  = in[j];
601             }
602           } else col = in[j];
603           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
604         }
605       }
606     } else {
607       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
608       if (!baij->donotstash) {
609         if (roworiented) {
610           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
611         } else {
612           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
613         }
614       }
615     }
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621 {
622   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
623   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
624   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
625 
626   PetscFunctionBegin;
627   for (i=0; i<m; i++) {
628     if (idxm[i] < 0) continue; /* negative row */
629     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
630     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
631       row = idxm[i] - bsrstart;
632       for (j=0; j<n; j++) {
633         if (idxn[j] < 0) continue; /* negative column */
634         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
635         if (idxn[j] >= bscstart && idxn[j] < bscend) {
636           col  = idxn[j] - bscstart;
637           PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
638         } else {
639           if (!baij->colmap) {
640             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
641           }
642 #if defined(PETSC_USE_CTABLE)
643           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
644           data--;
645 #else
646           data = baij->colmap[idxn[j]/bs]-1;
647 #endif
648           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
649           else {
650             col  = data + idxn[j]%bs;
651             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
652           }
653         }
654       }
655     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
661 {
662   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
663   PetscReal      sum[2],*lnorm2;
664 
665   PetscFunctionBegin;
666   if (baij->size == 1) {
667     PetscCall(MatNorm(baij->A,type,norm));
668   } else {
669     if (type == NORM_FROBENIUS) {
670       PetscCall(PetscMalloc1(2,&lnorm2));
671       PetscCall(MatNorm(baij->A,type,lnorm2));
672       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
673       PetscCall(MatNorm(baij->B,type,lnorm2));
674       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
675       PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
676       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
677       PetscCall(PetscFree(lnorm2));
678     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
679       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
680       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
681       PetscReal    *rsum,*rsum2,vabs;
682       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
683       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
684       MatScalar    *v;
685 
686       PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2));
687       PetscCall(PetscArrayzero(rsum,mat->cmap->N));
688       /* Amat */
689       v = amat->a; jj = amat->j;
690       for (brow=0; brow<mbs; brow++) {
691         grow = bs*(rstart + brow);
692         nz   = amat->i[brow+1] - amat->i[brow];
693         for (bcol=0; bcol<nz; bcol++) {
694           gcol = bs*(rstart + *jj); jj++;
695           for (col=0; col<bs; col++) {
696             for (row=0; row<bs; row++) {
697               vabs            = PetscAbsScalar(*v); v++;
698               rsum[gcol+col] += vabs;
699               /* non-diagonal block */
700               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
701             }
702           }
703         }
704         PetscCall(PetscLogFlops(nz*bs*bs));
705       }
706       /* Bmat */
707       v = bmat->a; jj = bmat->j;
708       for (brow=0; brow<mbs; brow++) {
709         grow = bs*(rstart + brow);
710         nz = bmat->i[brow+1] - bmat->i[brow];
711         for (bcol=0; bcol<nz; bcol++) {
712           gcol = bs*garray[*jj]; jj++;
713           for (col=0; col<bs; col++) {
714             for (row=0; row<bs; row++) {
715               vabs            = PetscAbsScalar(*v); v++;
716               rsum[gcol+col] += vabs;
717               rsum[grow+row] += vabs;
718             }
719           }
720         }
721         PetscCall(PetscLogFlops(nz*bs*bs));
722       }
723       PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
724       *norm = 0.0;
725       for (col=0; col<mat->cmap->N; col++) {
726         if (rsum2[col] > *norm) *norm = rsum2[col];
727       }
728       PetscCall(PetscFree2(rsum,rsum2));
729     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
730   }
731   PetscFunctionReturn(0);
732 }
733 
734 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
735 {
736   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
737   PetscInt       nstash,reallocs;
738 
739   PetscFunctionBegin;
740   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
741 
742   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
743   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
744   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
745   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
746   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
747   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
752 {
753   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
754   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
755   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
756   PetscInt       *row,*col;
757   PetscBool      other_disassembled;
758   PetscMPIInt    n;
759   PetscBool      r1,r2,r3;
760   MatScalar      *val;
761 
762   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
763   PetscFunctionBegin;
764   if (!baij->donotstash &&  !mat->nooffprocentries) {
765     while (1) {
766       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
767       if (!flg) break;
768 
769       for (i=0; i<n;) {
770         /* Now identify the consecutive vals belonging to the same row */
771         for (j=i,rstart=row[j]; j<n; j++) {
772           if (row[j] != rstart) break;
773         }
774         if (j < n) ncols = j-i;
775         else       ncols = n-i;
776         /* Now assemble all these values with a single function call */
777         PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
778         i    = j;
779       }
780     }
781     PetscCall(MatStashScatterEnd_Private(&mat->stash));
782     /* Now process the block-stash. Since the values are stashed column-oriented,
783        set the roworiented flag to column oriented, and after MatSetValues()
784        restore the original flags */
785     r1 = baij->roworiented;
786     r2 = a->roworiented;
787     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
788 
789     baij->roworiented = PETSC_FALSE;
790     a->roworiented    = PETSC_FALSE;
791 
792     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
793     while (1) {
794       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
795       if (!flg) break;
796 
797       for (i=0; i<n;) {
798         /* Now identify the consecutive vals belonging to the same row */
799         for (j=i,rstart=row[j]; j<n; j++) {
800           if (row[j] != rstart) break;
801         }
802         if (j < n) ncols = j-i;
803         else       ncols = n-i;
804         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
805         i    = j;
806       }
807     }
808     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
809 
810     baij->roworiented = r1;
811     a->roworiented    = r2;
812 
813     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
814   }
815 
816   PetscCall(MatAssemblyBegin(baij->A,mode));
817   PetscCall(MatAssemblyEnd(baij->A,mode));
818 
819   /* determine if any processor has disassembled, if so we must
820      also disassemble ourselves, in order that we may reassemble. */
821   /*
822      if nonzero structure of submatrix B cannot change then we know that
823      no processor disassembled thus we can skip this stuff
824   */
825   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
826     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
827     if (mat->was_assembled && !other_disassembled) {
828       PetscCall(MatDisAssemble_MPISBAIJ(mat));
829     }
830   }
831 
832   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
833     PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
834   }
835   PetscCall(MatAssemblyBegin(baij->B,mode));
836   PetscCall(MatAssemblyEnd(baij->B,mode));
837 
838   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
839 
840   baij->rowvalues = NULL;
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
844     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
845     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
851 #include <petscdraw.h>
852 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
853 {
854   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
855   PetscInt          bs   = mat->rmap->bs;
856   PetscMPIInt       rank = baij->rank;
857   PetscBool         iascii,isdraw;
858   PetscViewer       sviewer;
859   PetscViewerFormat format;
860 
861   PetscFunctionBegin;
862   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
863   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
864   if (iascii) {
865     PetscCall(PetscViewerGetFormat(viewer,&format));
866     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
867       MatInfo info;
868       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
869       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
870       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
871       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
872       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
873       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
874       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
875       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
876       PetscCall(PetscViewerFlush(viewer));
877       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
878       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
879       PetscCall(VecScatterView(baij->Mvctx,viewer));
880       PetscFunctionReturn(0);
881     } else if (format == PETSC_VIEWER_ASCII_INFO) {
882       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
883       PetscFunctionReturn(0);
884     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
885       PetscFunctionReturn(0);
886     }
887   }
888 
889   if (isdraw) {
890     PetscDraw draw;
891     PetscBool isnull;
892     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
893     PetscCall(PetscDrawIsNull(draw,&isnull));
894     if (isnull) PetscFunctionReturn(0);
895   }
896 
897   {
898     /* assemble the entire matrix onto first processor. */
899     Mat          A;
900     Mat_SeqSBAIJ *Aloc;
901     Mat_SeqBAIJ  *Bloc;
902     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
903     MatScalar    *a;
904     const char   *matname;
905 
906     /* Should this be the same type as mat? */
907     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
908     if (rank == 0) {
909       PetscCall(MatSetSizes(A,M,N,M,N));
910     } else {
911       PetscCall(MatSetSizes(A,0,0,M,N));
912     }
913     PetscCall(MatSetType(A,MATMPISBAIJ));
914     PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
915     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
916     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
917 
918     /* copy over the A part */
919     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
920     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
921     PetscCall(PetscMalloc1(bs,&rvals));
922 
923     for (i=0; i<mbs; i++) {
924       rvals[0] = bs*(baij->rstartbs + i);
925       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
926       for (j=ai[i]; j<ai[i+1]; j++) {
927         col = (baij->cstartbs+aj[j])*bs;
928         for (k=0; k<bs; k++) {
929           PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
930           col++;
931           a += bs;
932         }
933       }
934     }
935     /* copy over the B part */
936     Bloc = (Mat_SeqBAIJ*)baij->B->data;
937     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
938     for (i=0; i<mbs; i++) {
939 
940       rvals[0] = bs*(baij->rstartbs + i);
941       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
942       for (j=ai[i]; j<ai[i+1]; j++) {
943         col = baij->garray[aj[j]]*bs;
944         for (k=0; k<bs; k++) {
945           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
946           col++;
947           a += bs;
948         }
949       }
950     }
951     PetscCall(PetscFree(rvals));
952     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
953     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
954     /*
955        Everyone has to call to draw the matrix since the graphics waits are
956        synchronized across all processors that share the PetscDraw object
957     */
958     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
959     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
960     if (rank == 0) {
961       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname));
962       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer));
963     }
964     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
965     PetscCall(PetscViewerFlush(viewer));
966     PetscCall(MatDestroy(&A));
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 /* Used for both MPIBAIJ and MPISBAIJ matrices */
972 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
973 
974 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
975 {
976   PetscBool      iascii,isdraw,issocket,isbinary;
977 
978   PetscFunctionBegin;
979   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
980   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
981   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
982   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
983   if (iascii || isdraw || issocket) {
984     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer));
985   } else if (isbinary) {
986     PetscCall(MatView_MPISBAIJ_Binary(mat,viewer));
987   }
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
992 {
993   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
994 
995   PetscFunctionBegin;
996 #if defined(PETSC_USE_LOG)
997   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
998 #endif
999   PetscCall(MatStashDestroy_Private(&mat->stash));
1000   PetscCall(MatStashDestroy_Private(&mat->bstash));
1001   PetscCall(MatDestroy(&baij->A));
1002   PetscCall(MatDestroy(&baij->B));
1003 #if defined(PETSC_USE_CTABLE)
1004   PetscCall(PetscTableDestroy(&baij->colmap));
1005 #else
1006   PetscCall(PetscFree(baij->colmap));
1007 #endif
1008   PetscCall(PetscFree(baij->garray));
1009   PetscCall(VecDestroy(&baij->lvec));
1010   PetscCall(VecScatterDestroy(&baij->Mvctx));
1011   PetscCall(VecDestroy(&baij->slvec0));
1012   PetscCall(VecDestroy(&baij->slvec0b));
1013   PetscCall(VecDestroy(&baij->slvec1));
1014   PetscCall(VecDestroy(&baij->slvec1a));
1015   PetscCall(VecDestroy(&baij->slvec1b));
1016   PetscCall(VecScatterDestroy(&baij->sMvctx));
1017   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1018   PetscCall(PetscFree(baij->barray));
1019   PetscCall(PetscFree(baij->hd));
1020   PetscCall(VecDestroy(&baij->diag));
1021   PetscCall(VecDestroy(&baij->bb1));
1022   PetscCall(VecDestroy(&baij->xx1));
1023 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1024   PetscCall(PetscFree(baij->setvaluescopy));
1025 #endif
1026   PetscCall(PetscFree(baij->in_loc));
1027   PetscCall(PetscFree(baij->v_loc));
1028   PetscCall(PetscFree(baij->rangebs));
1029   PetscCall(PetscFree(mat->data));
1030 
1031   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1032   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1033   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1034   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL));
1035   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL));
1036 #if defined(PETSC_HAVE_ELEMENTAL)
1037   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL));
1038 #endif
1039 #if defined(PETSC_HAVE_SCALAPACK)
1040   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL));
1041 #endif
1042   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL));
1043   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL));
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1048 {
1049   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1050   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1051   PetscScalar       *from;
1052   const PetscScalar *x;
1053 
1054   PetscFunctionBegin;
1055   /* diagonal part */
1056   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1057   PetscCall(VecSet(a->slvec1b,0.0));
1058 
1059   /* subdiagonal part */
1060   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1061   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1062 
1063   /* copy x into the vec slvec0 */
1064   PetscCall(VecGetArray(a->slvec0,&from));
1065   PetscCall(VecGetArrayRead(xx,&x));
1066 
1067   PetscCall(PetscArraycpy(from,x,bs*mbs));
1068   PetscCall(VecRestoreArray(a->slvec0,&from));
1069   PetscCall(VecRestoreArrayRead(xx,&x));
1070 
1071   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1072   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1073   /* supperdiagonal part */
1074   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1079 {
1080   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1081   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1082   PetscScalar       *from;
1083   const PetscScalar *x;
1084 
1085   PetscFunctionBegin;
1086   /* diagonal part */
1087   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1088   PetscCall(VecSet(a->slvec1b,0.0));
1089 
1090   /* subdiagonal part */
1091   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1092 
1093   /* copy x into the vec slvec0 */
1094   PetscCall(VecGetArray(a->slvec0,&from));
1095   PetscCall(VecGetArrayRead(xx,&x));
1096 
1097   PetscCall(PetscArraycpy(from,x,bs*mbs));
1098   PetscCall(VecRestoreArray(a->slvec0,&from));
1099   PetscCall(VecRestoreArrayRead(xx,&x));
1100 
1101   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1102   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1103   /* supperdiagonal part */
1104   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1111   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1112   PetscScalar       *from,zero=0.0;
1113   const PetscScalar *x;
1114 
1115   PetscFunctionBegin;
1116   /* diagonal part */
1117   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1118   PetscCall(VecSet(a->slvec1b,zero));
1119 
1120   /* subdiagonal part */
1121   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1122   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1123 
1124   /* copy x into the vec slvec0 */
1125   PetscCall(VecGetArray(a->slvec0,&from));
1126   PetscCall(VecGetArrayRead(xx,&x));
1127   PetscCall(PetscArraycpy(from,x,bs*mbs));
1128   PetscCall(VecRestoreArray(a->slvec0,&from));
1129 
1130   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1131   PetscCall(VecRestoreArrayRead(xx,&x));
1132   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1133 
1134   /* supperdiagonal part */
1135   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1140 {
1141   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1142   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1143   PetscScalar       *from,zero=0.0;
1144   const PetscScalar *x;
1145 
1146   PetscFunctionBegin;
1147   /* diagonal part */
1148   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1149   PetscCall(VecSet(a->slvec1b,zero));
1150 
1151   /* subdiagonal part */
1152   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1153 
1154   /* copy x into the vec slvec0 */
1155   PetscCall(VecGetArray(a->slvec0,&from));
1156   PetscCall(VecGetArrayRead(xx,&x));
1157   PetscCall(PetscArraycpy(from,x,bs*mbs));
1158   PetscCall(VecRestoreArray(a->slvec0,&from));
1159 
1160   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1161   PetscCall(VecRestoreArrayRead(xx,&x));
1162   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1163 
1164   /* supperdiagonal part */
1165   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 /*
1170   This only works correctly for square matrices where the subblock A->A is the
1171    diagonal block
1172 */
1173 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1174 {
1175   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1176 
1177   PetscFunctionBegin;
1178   /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1179   PetscCall(MatGetDiagonal(a->A,v));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1184 {
1185   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(MatScale(a->A,aa));
1189   PetscCall(MatScale(a->B,aa));
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1194 {
1195   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1196   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1197   PetscInt     bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1198   PetscInt     nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1199   PetscInt    *cmap,*idx_p,cstart = mat->rstartbs;
1200 
1201   PetscFunctionBegin;
1202   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1203   mat->getrowactive = PETSC_TRUE;
1204 
1205   if (!mat->rowvalues && (idx || v)) {
1206     /*
1207         allocate enough space to hold information from the longest row.
1208     */
1209     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1210     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1211     PetscInt     max = 1,mbs = mat->mbs,tmp;
1212     for (i=0; i<mbs; i++) {
1213       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1214       if (max < tmp) max = tmp;
1215     }
1216     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1217   }
1218 
1219   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1220   lrow = row - brstart;  /* local row index */
1221 
1222   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1223   if (!v)   {pvA = NULL; pvB = NULL;}
1224   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1225   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1226   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1227   nztot = nzA + nzB;
1228 
1229   cmap = mat->garray;
1230   if (v  || idx) {
1231     if (nztot) {
1232       /* Sort by increasing column numbers, assuming A and B already sorted */
1233       PetscInt imark = -1;
1234       if (v) {
1235         *v = v_p = mat->rowvalues;
1236         for (i=0; i<nzB; i++) {
1237           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1238           else break;
1239         }
1240         imark = i;
1241         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1242         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1243       }
1244       if (idx) {
1245         *idx = idx_p = mat->rowindices;
1246         if (imark > -1) {
1247           for (i=0; i<imark; i++) {
1248             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1249           }
1250         } else {
1251           for (i=0; i<nzB; i++) {
1252             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1253             else break;
1254           }
1255           imark = i;
1256         }
1257         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1258         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1259       }
1260     } else {
1261       if (idx) *idx = NULL;
1262       if (v)   *v   = NULL;
1263     }
1264   }
1265   *nz  = nztot;
1266   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1267   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1272 {
1273   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1274 
1275   PetscFunctionBegin;
1276   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1277   baij->getrowactive = PETSC_FALSE;
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1282 {
1283   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1284   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1285 
1286   PetscFunctionBegin;
1287   aA->getrow_utriangular = PETSC_TRUE;
1288   PetscFunctionReturn(0);
1289 }
1290 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1291 {
1292   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1293   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1294 
1295   PetscFunctionBegin;
1296   aA->getrow_utriangular = PETSC_FALSE;
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1301 {
1302   PetscFunctionBegin;
1303   if (PetscDefined(USE_COMPLEX)) {
1304     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
1305 
1306     PetscCall(MatConjugate(a->A));
1307     PetscCall(MatConjugate(a->B));
1308   }
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315 
1316   PetscFunctionBegin;
1317   PetscCall(MatRealPart(a->A));
1318   PetscCall(MatRealPart(a->B));
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1325 
1326   PetscFunctionBegin;
1327   PetscCall(MatImaginaryPart(a->A));
1328   PetscCall(MatImaginaryPart(a->B));
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1333    Input: isrow       - distributed(parallel),
1334           iscol_local - locally owned (seq)
1335 */
1336 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1337 {
1338   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1339   const PetscInt *ptr1,*ptr2;
1340 
1341   PetscFunctionBegin;
1342   PetscCall(ISGetLocalSize(isrow,&sz1));
1343   PetscCall(ISGetLocalSize(iscol_local,&sz2));
1344   if (sz1 > sz2) {
1345     *flg = PETSC_FALSE;
1346     PetscFunctionReturn(0);
1347   }
1348 
1349   PetscCall(ISGetIndices(isrow,&ptr1));
1350   PetscCall(ISGetIndices(iscol_local,&ptr2));
1351 
1352   PetscCall(PetscMalloc1(sz1,&a1));
1353   PetscCall(PetscMalloc1(sz2,&a2));
1354   PetscCall(PetscArraycpy(a1,ptr1,sz1));
1355   PetscCall(PetscArraycpy(a2,ptr2,sz2));
1356   PetscCall(PetscSortInt(sz1,a1));
1357   PetscCall(PetscSortInt(sz2,a2));
1358 
1359   nmatch=0;
1360   k     = 0;
1361   for (i=0; i<sz1; i++) {
1362     for (j=k; j<sz2; j++) {
1363       if (a1[i] == a2[j]) {
1364         k = j; nmatch++;
1365         break;
1366       }
1367     }
1368   }
1369   PetscCall(ISRestoreIndices(isrow,&ptr1));
1370   PetscCall(ISRestoreIndices(iscol_local,&ptr2));
1371   PetscCall(PetscFree(a1));
1372   PetscCall(PetscFree(a2));
1373   if (nmatch < sz1) {
1374     *flg = PETSC_FALSE;
1375   } else {
1376     *flg = PETSC_TRUE;
1377   }
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1382 {
1383   IS             iscol_local;
1384   PetscInt       csize;
1385   PetscBool      isequal;
1386 
1387   PetscFunctionBegin;
1388   PetscCall(ISGetLocalSize(iscol,&csize));
1389   if (call == MAT_REUSE_MATRIX) {
1390     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1391     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1392   } else {
1393     PetscBool issorted;
1394 
1395     PetscCall(ISAllGather(iscol,&iscol_local));
1396     PetscCall(ISEqual_private(isrow,iscol_local,&isequal));
1397     PetscCall(ISSorted(iscol_local, &issorted));
1398     PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted");
1399   }
1400 
1401   /* now call MatCreateSubMatrix_MPIBAIJ() */
1402   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1403   if (call == MAT_INITIAL_MATRIX) {
1404     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1405     PetscCall(ISDestroy(&iscol_local));
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1411 {
1412   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1413 
1414   PetscFunctionBegin;
1415   PetscCall(MatZeroEntries(l->A));
1416   PetscCall(MatZeroEntries(l->B));
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1421 {
1422   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1423   Mat            A  = a->A,B = a->B;
1424   PetscLogDouble isend[5],irecv[5];
1425 
1426   PetscFunctionBegin;
1427   info->block_size = (PetscReal)matin->rmap->bs;
1428 
1429   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1430 
1431   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1432   isend[3] = info->memory;  isend[4] = info->mallocs;
1433 
1434   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1435 
1436   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1437   isend[3] += info->memory;  isend[4] += info->mallocs;
1438   if (flag == MAT_LOCAL) {
1439     info->nz_used      = isend[0];
1440     info->nz_allocated = isend[1];
1441     info->nz_unneeded  = isend[2];
1442     info->memory       = isend[3];
1443     info->mallocs      = isend[4];
1444   } else if (flag == MAT_GLOBAL_MAX) {
1445     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1446 
1447     info->nz_used      = irecv[0];
1448     info->nz_allocated = irecv[1];
1449     info->nz_unneeded  = irecv[2];
1450     info->memory       = irecv[3];
1451     info->mallocs      = irecv[4];
1452   } else if (flag == MAT_GLOBAL_SUM) {
1453     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1454 
1455     info->nz_used      = irecv[0];
1456     info->nz_allocated = irecv[1];
1457     info->nz_unneeded  = irecv[2];
1458     info->memory       = irecv[3];
1459     info->mallocs      = irecv[4];
1460   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1461   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1462   info->fill_ratio_needed = 0;
1463   info->factor_mallocs    = 0;
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1468 {
1469   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1470   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1471 
1472   PetscFunctionBegin;
1473   switch (op) {
1474   case MAT_NEW_NONZERO_LOCATIONS:
1475   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1476   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1477   case MAT_KEEP_NONZERO_PATTERN:
1478   case MAT_SUBMAT_SINGLEIS:
1479   case MAT_NEW_NONZERO_LOCATION_ERR:
1480     MatCheckPreallocated(A,1);
1481     PetscCall(MatSetOption(a->A,op,flg));
1482     PetscCall(MatSetOption(a->B,op,flg));
1483     break;
1484   case MAT_ROW_ORIENTED:
1485     MatCheckPreallocated(A,1);
1486     a->roworiented = flg;
1487 
1488     PetscCall(MatSetOption(a->A,op,flg));
1489     PetscCall(MatSetOption(a->B,op,flg));
1490     break;
1491   case MAT_FORCE_DIAGONAL_ENTRIES:
1492   case MAT_SORTED_FULL:
1493     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1494     break;
1495   case MAT_IGNORE_OFF_PROC_ENTRIES:
1496     a->donotstash = flg;
1497     break;
1498   case MAT_USE_HASH_TABLE:
1499     a->ht_flag = flg;
1500     break;
1501   case MAT_HERMITIAN:
1502     MatCheckPreallocated(A,1);
1503     PetscCall(MatSetOption(a->A,op,flg));
1504 #if defined(PETSC_USE_COMPLEX)
1505     if (flg) { /* need different mat-vec ops */
1506       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1507       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1508       A->ops->multtranspose    = NULL;
1509       A->ops->multtransposeadd = NULL;
1510       A->symmetric = PETSC_FALSE;
1511     }
1512 #endif
1513     break;
1514   case MAT_SPD:
1515   case MAT_SYMMETRIC:
1516     MatCheckPreallocated(A,1);
1517     PetscCall(MatSetOption(a->A,op,flg));
1518 #if defined(PETSC_USE_COMPLEX)
1519     if (flg) { /* restore to use default mat-vec ops */
1520       A->ops->mult             = MatMult_MPISBAIJ;
1521       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1522       A->ops->multtranspose    = MatMult_MPISBAIJ;
1523       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1524     }
1525 #endif
1526     break;
1527   case MAT_STRUCTURALLY_SYMMETRIC:
1528     MatCheckPreallocated(A,1);
1529     PetscCall(MatSetOption(a->A,op,flg));
1530     break;
1531   case MAT_SYMMETRY_ETERNAL:
1532     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1533     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1534     break;
1535   case MAT_IGNORE_LOWER_TRIANGULAR:
1536     aA->ignore_ltriangular = flg;
1537     break;
1538   case MAT_ERROR_LOWER_TRIANGULAR:
1539     aA->ignore_ltriangular = flg;
1540     break;
1541   case MAT_GETROW_UPPERTRIANGULAR:
1542     aA->getrow_utriangular = flg;
1543     break;
1544   default:
1545     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1551 {
1552   PetscFunctionBegin;
1553   if (reuse == MAT_INITIAL_MATRIX) {
1554     PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B));
1555   }  else if (reuse == MAT_REUSE_MATRIX) {
1556     PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN));
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1562 {
1563   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1564   Mat            a     = baij->A, b=baij->B;
1565   PetscInt       nv,m,n;
1566   PetscBool      flg;
1567 
1568   PetscFunctionBegin;
1569   if (ll != rr) {
1570     PetscCall(VecEqual(ll,rr,&flg));
1571     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same");
1572   }
1573   if (!ll) PetscFunctionReturn(0);
1574 
1575   PetscCall(MatGetLocalSize(mat,&m,&n));
1576   PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n);
1577 
1578   PetscCall(VecGetLocalSize(rr,&nv));
1579   PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1580 
1581   PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1582 
1583   /* left diagonalscale the off-diagonal part */
1584   PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1585 
1586   /* scale the diagonal part */
1587   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1588 
1589   /* right diagonalscale the off-diagonal part */
1590   PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1591   PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1596 {
1597   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1598 
1599   PetscFunctionBegin;
1600   PetscCall(MatSetUnfactored(a->A));
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1605 
1606 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1607 {
1608   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1609   Mat            a,b,c,d;
1610   PetscBool      flg;
1611 
1612   PetscFunctionBegin;
1613   a = matA->A; b = matA->B;
1614   c = matB->A; d = matB->B;
1615 
1616   PetscCall(MatEqual(a,c,&flg));
1617   if (flg) {
1618     PetscCall(MatEqual(b,d,&flg));
1619   }
1620   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1625 {
1626   PetscBool      isbaij;
1627 
1628   PetscFunctionBegin;
1629   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,""));
1630   PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1631   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1632   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1633     PetscCall(MatGetRowUpperTriangular(A));
1634     PetscCall(MatCopy_Basic(A,B,str));
1635     PetscCall(MatRestoreRowUpperTriangular(A));
1636   } else {
1637     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1638     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1639 
1640     PetscCall(MatCopy(a->A,b->A,str));
1641     PetscCall(MatCopy(a->B,b->B,str));
1642   }
1643   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1648 {
1649   PetscFunctionBegin;
1650   PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1655 {
1656   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1657   PetscBLASInt   bnz,one=1;
1658   Mat_SeqSBAIJ   *xa,*ya;
1659   Mat_SeqBAIJ    *xb,*yb;
1660 
1661   PetscFunctionBegin;
1662   if (str == SAME_NONZERO_PATTERN) {
1663     PetscScalar alpha = a;
1664     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1665     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1666     PetscCall(PetscBLASIntCast(xa->nz,&bnz));
1667     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1668     xb   = (Mat_SeqBAIJ*)xx->B->data;
1669     yb   = (Mat_SeqBAIJ*)yy->B->data;
1670     PetscCall(PetscBLASIntCast(xb->nz,&bnz));
1671     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1672     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1673   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1674     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
1675     PetscCall(MatAXPY_Basic(Y,a,X,str));
1676     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
1677   } else {
1678     Mat      B;
1679     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1680     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1681     PetscCall(MatGetRowUpperTriangular(X));
1682     PetscCall(MatGetRowUpperTriangular(Y));
1683     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1684     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1685     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1686     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1687     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1688     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1689     PetscCall(MatSetType(B,MATMPISBAIJ));
1690     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d));
1691     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1692     PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1693     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1694     PetscCall(MatHeaderMerge(Y,&B));
1695     PetscCall(PetscFree(nnz_d));
1696     PetscCall(PetscFree(nnz_o));
1697     PetscCall(MatRestoreRowUpperTriangular(X));
1698     PetscCall(MatRestoreRowUpperTriangular(Y));
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1704 {
1705   PetscInt       i;
1706   PetscBool      flg;
1707 
1708   PetscFunctionBegin;
1709   PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */
1710   for (i=0; i<n; i++) {
1711     PetscCall(ISEqual(irow[i],icol[i],&flg));
1712     if (!flg) {
1713       PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1714     }
1715   }
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1720 {
1721   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1722   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1723 
1724   PetscFunctionBegin;
1725   if (!Y->preallocated) {
1726     PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
1727   } else if (!aij->nz) {
1728     PetscInt nonew = aij->nonew;
1729     PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
1730     aij->nonew = nonew;
1731   }
1732   PetscCall(MatShift_Basic(Y,a));
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1737 {
1738   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1739 
1740   PetscFunctionBegin;
1741   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1742   PetscCall(MatMissingDiagonal(a->A,missing,d));
1743   if (d) {
1744     PetscInt rstart;
1745     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1746     *d += rstart/A->rmap->bs;
1747 
1748   }
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1753 {
1754   PetscFunctionBegin;
1755   *a = ((Mat_MPISBAIJ*)A->data)->A;
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 /* -------------------------------------------------------------------*/
1760 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1761                                        MatGetRow_MPISBAIJ,
1762                                        MatRestoreRow_MPISBAIJ,
1763                                        MatMult_MPISBAIJ,
1764                                /*  4*/ MatMultAdd_MPISBAIJ,
1765                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1766                                        MatMultAdd_MPISBAIJ,
1767                                        NULL,
1768                                        NULL,
1769                                        NULL,
1770                                /* 10*/ NULL,
1771                                        NULL,
1772                                        NULL,
1773                                        MatSOR_MPISBAIJ,
1774                                        MatTranspose_MPISBAIJ,
1775                                /* 15*/ MatGetInfo_MPISBAIJ,
1776                                        MatEqual_MPISBAIJ,
1777                                        MatGetDiagonal_MPISBAIJ,
1778                                        MatDiagonalScale_MPISBAIJ,
1779                                        MatNorm_MPISBAIJ,
1780                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1781                                        MatAssemblyEnd_MPISBAIJ,
1782                                        MatSetOption_MPISBAIJ,
1783                                        MatZeroEntries_MPISBAIJ,
1784                                /* 24*/ NULL,
1785                                        NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                /* 29*/ MatSetUp_MPISBAIJ,
1790                                        NULL,
1791                                        NULL,
1792                                        MatGetDiagonalBlock_MPISBAIJ,
1793                                        NULL,
1794                                /* 34*/ MatDuplicate_MPISBAIJ,
1795                                        NULL,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                /* 39*/ MatAXPY_MPISBAIJ,
1800                                        MatCreateSubMatrices_MPISBAIJ,
1801                                        MatIncreaseOverlap_MPISBAIJ,
1802                                        MatGetValues_MPISBAIJ,
1803                                        MatCopy_MPISBAIJ,
1804                                /* 44*/ NULL,
1805                                        MatScale_MPISBAIJ,
1806                                        MatShift_MPISBAIJ,
1807                                        NULL,
1808                                        NULL,
1809                                /* 49*/ NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                /* 54*/ NULL,
1815                                        NULL,
1816                                        MatSetUnfactored_MPISBAIJ,
1817                                        NULL,
1818                                        MatSetValuesBlocked_MPISBAIJ,
1819                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1820                                        NULL,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                /* 64*/ NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1830                                        NULL,
1831                                        MatConvert_MPISBAIJ_Basic,
1832                                        NULL,
1833                                        NULL,
1834                                /* 74*/ NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                /* 79*/ NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        MatLoad_MPISBAIJ,
1844                                /* 84*/ NULL,
1845                                        NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                /* 89*/ NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                /* 94*/ NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                /* 99*/ NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        MatConjugate_MPISBAIJ,
1863                                        NULL,
1864                                /*104*/ NULL,
1865                                        MatRealPart_MPISBAIJ,
1866                                        MatImaginaryPart_MPISBAIJ,
1867                                        MatGetRowUpperTriangular_MPISBAIJ,
1868                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1869                                /*109*/ NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        MatMissingDiagonal_MPISBAIJ,
1874                                /*114*/ NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                /*119*/ NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                /*124*/ NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                /*129*/ NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                /*134*/ NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                /*139*/ MatSetBlockSizes_Default,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1905                                        NULL,
1906                                        NULL,
1907                                        NULL
1908 };
1909 
1910 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1911 {
1912   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1913   PetscInt       i,mbs,Mbs;
1914   PetscMPIInt    size;
1915 
1916   PetscFunctionBegin;
1917   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
1918   PetscCall(PetscLayoutSetUp(B->rmap));
1919   PetscCall(PetscLayoutSetUp(B->cmap));
1920   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
1921   PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N);
1922   PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n);
1923 
1924   mbs = B->rmap->n/bs;
1925   Mbs = B->rmap->N/bs;
1926   PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs);
1927 
1928   B->rmap->bs = bs;
1929   b->bs2      = bs*bs;
1930   b->mbs      = mbs;
1931   b->Mbs      = Mbs;
1932   b->nbs      = B->cmap->n/bs;
1933   b->Nbs      = B->cmap->N/bs;
1934 
1935   for (i=0; i<=b->size; i++) {
1936     b->rangebs[i] = B->rmap->range[i]/bs;
1937   }
1938   b->rstartbs = B->rmap->rstart/bs;
1939   b->rendbs   = B->rmap->rend/bs;
1940 
1941   b->cstartbs = B->cmap->rstart/bs;
1942   b->cendbs   = B->cmap->rend/bs;
1943 
1944 #if defined(PETSC_USE_CTABLE)
1945   PetscCall(PetscTableDestroy(&b->colmap));
1946 #else
1947   PetscCall(PetscFree(b->colmap));
1948 #endif
1949   PetscCall(PetscFree(b->garray));
1950   PetscCall(VecDestroy(&b->lvec));
1951   PetscCall(VecScatterDestroy(&b->Mvctx));
1952   PetscCall(VecDestroy(&b->slvec0));
1953   PetscCall(VecDestroy(&b->slvec0b));
1954   PetscCall(VecDestroy(&b->slvec1));
1955   PetscCall(VecDestroy(&b->slvec1a));
1956   PetscCall(VecDestroy(&b->slvec1b));
1957   PetscCall(VecScatterDestroy(&b->sMvctx));
1958 
1959   /* Because the B will have been resized we simply destroy it and create a new one each time */
1960   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
1961   PetscCall(MatDestroy(&b->B));
1962   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
1963   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
1964   PetscCall(MatSetType(b->B,MATSEQBAIJ));
1965   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
1966 
1967   if (!B->preallocated) {
1968     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
1969     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
1970     PetscCall(MatSetType(b->A,MATSEQSBAIJ));
1971     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
1972     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
1973   }
1974 
1975   PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
1976   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
1977 
1978   B->preallocated  = PETSC_TRUE;
1979   B->was_assembled = PETSC_FALSE;
1980   B->assembled     = PETSC_FALSE;
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1985 {
1986   PetscInt       m,rstart,cend;
1987   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
1988   const PetscInt *JJ    =NULL;
1989   PetscScalar    *values=NULL;
1990   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
1991   PetscBool      nooffprocentries;
1992 
1993   PetscFunctionBegin;
1994   PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
1995   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
1996   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
1997   PetscCall(PetscLayoutSetUp(B->rmap));
1998   PetscCall(PetscLayoutSetUp(B->cmap));
1999   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2000   m      = B->rmap->n/bs;
2001   rstart = B->rmap->rstart/bs;
2002   cend   = B->cmap->rend/bs;
2003 
2004   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2005   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2006   for (i=0; i<m; i++) {
2007     nz = ii[i+1] - ii[i];
2008     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2009     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2010     JJ     = jj + ii[i];
2011     bd     = 0;
2012     for (j=0; j<nz; j++) {
2013       if (*JJ >= i + rstart) break;
2014       JJ++;
2015       bd++;
2016     }
2017     d  = 0;
2018     for (; j<nz; j++) {
2019       if (*JJ++ >= cend) break;
2020       d++;
2021     }
2022     d_nnz[i] = d;
2023     o_nnz[i] = nz - d - bd;
2024     nz       = nz - bd;
2025     nz_max = PetscMax(nz_max,nz);
2026   }
2027   PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2028   PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE));
2029   PetscCall(PetscFree2(d_nnz,o_nnz));
2030 
2031   values = (PetscScalar*)V;
2032   if (!values) {
2033     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2034   }
2035   for (i=0; i<m; i++) {
2036     PetscInt          row    = i + rstart;
2037     PetscInt          ncols  = ii[i+1] - ii[i];
2038     const PetscInt    *icols = jj + ii[i];
2039     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2040       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2041       PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2042     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2043       PetscInt j;
2044       for (j=0; j<ncols; j++) {
2045         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2046         PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2047       }
2048     }
2049   }
2050 
2051   if (!V) PetscCall(PetscFree(values));
2052   nooffprocentries    = B->nooffprocentries;
2053   B->nooffprocentries = PETSC_TRUE;
2054   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2055   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2056   B->nooffprocentries = nooffprocentries;
2057 
2058   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 /*MC
2063    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2064    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2065    the matrix is stored.
2066 
2067    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2068    can call MatSetOption(Mat, MAT_HERMITIAN);
2069 
2070    Options Database Keys:
2071 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2072 
2073    Notes:
2074      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2075      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2076 
2077    Level: beginner
2078 
2079 .seealso: `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2080 M*/
2081 
2082 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2083 {
2084   Mat_MPISBAIJ   *b;
2085   PetscBool      flg = PETSC_FALSE;
2086 
2087   PetscFunctionBegin;
2088   PetscCall(PetscNewLog(B,&b));
2089   B->data = (void*)b;
2090   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2091 
2092   B->ops->destroy = MatDestroy_MPISBAIJ;
2093   B->ops->view    = MatView_MPISBAIJ;
2094   B->assembled    = PETSC_FALSE;
2095   B->insertmode   = NOT_SET_VALUES;
2096 
2097   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2098   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2099 
2100   /* build local table of row and column ownerships */
2101   PetscCall(PetscMalloc1(b->size+2,&b->rangebs));
2102 
2103   /* build cache for off array entries formed */
2104   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2105 
2106   b->donotstash  = PETSC_FALSE;
2107   b->colmap      = NULL;
2108   b->garray      = NULL;
2109   b->roworiented = PETSC_TRUE;
2110 
2111   /* stuff used in block assembly */
2112   b->barray = NULL;
2113 
2114   /* stuff used for matrix vector multiply */
2115   b->lvec    = NULL;
2116   b->Mvctx   = NULL;
2117   b->slvec0  = NULL;
2118   b->slvec0b = NULL;
2119   b->slvec1  = NULL;
2120   b->slvec1a = NULL;
2121   b->slvec1b = NULL;
2122   b->sMvctx  = NULL;
2123 
2124   /* stuff for MatGetRow() */
2125   b->rowindices   = NULL;
2126   b->rowvalues    = NULL;
2127   b->getrowactive = PETSC_FALSE;
2128 
2129   /* hash table stuff */
2130   b->ht           = NULL;
2131   b->hd           = NULL;
2132   b->ht_size      = 0;
2133   b->ht_flag      = PETSC_FALSE;
2134   b->ht_fact      = 0;
2135   b->ht_total_ct  = 0;
2136   b->ht_insert_ct = 0;
2137 
2138   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2139   b->ijonly = PETSC_FALSE;
2140 
2141   b->in_loc = NULL;
2142   b->v_loc  = NULL;
2143   b->n_loc  = 0;
2144 
2145   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ));
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ));
2147   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ));
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2149 #if defined(PETSC_HAVE_ELEMENTAL)
2150   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental));
2151 #endif
2152 #if defined(PETSC_HAVE_SCALAPACK)
2153   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK));
2154 #endif
2155   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic));
2156   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic));
2157 
2158   B->symmetric                  = PETSC_TRUE;
2159   B->structurally_symmetric     = PETSC_TRUE;
2160   B->symmetric_set              = PETSC_TRUE;
2161   B->structurally_symmetric_set = PETSC_TRUE;
2162   B->symmetric_eternal          = PETSC_TRUE;
2163 #if defined(PETSC_USE_COMPLEX)
2164   B->hermitian                  = PETSC_FALSE;
2165   B->hermitian_set              = PETSC_FALSE;
2166 #else
2167   B->hermitian                  = PETSC_TRUE;
2168   B->hermitian_set              = PETSC_TRUE;
2169 #endif
2170 
2171   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ));
2172   PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");
2173   PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL));
2174   if (flg) {
2175     PetscReal fact = 1.39;
2176     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2177     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2178     if (fact <= 1.0) fact = 1.39;
2179     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2180     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2181   }
2182   PetscOptionsEnd();
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 /*MC
2187    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2188 
2189    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2190    and MATMPISBAIJ otherwise.
2191 
2192    Options Database Keys:
2193 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2194 
2195   Level: beginner
2196 
2197 .seealso: `MatCreateSBAIJ`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2198 M*/
2199 
2200 /*@C
2201    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2202    the user should preallocate the matrix storage by setting the parameters
2203    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2204    performance can be increased by more than a factor of 50.
2205 
2206    Collective on Mat
2207 
2208    Input Parameters:
2209 +  B - the matrix
2210 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2211           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2212 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2213            submatrix  (same for all local rows)
2214 .  d_nnz - array containing the number of block nonzeros in the various block rows
2215            in the upper triangular and diagonal part of the in diagonal portion of the local
2216            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2217            for the diagonal entry and set a value even if it is zero.
2218 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2219            submatrix (same for all local rows).
2220 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2221            off-diagonal portion of the local submatrix that is right of the diagonal
2222            (possibly different for each block row) or NULL.
2223 
2224    Options Database Keys:
2225 +   -mat_no_unroll - uses code that does not unroll the loops in the
2226                      block calculations (much slower)
2227 -   -mat_block_size - size of the blocks to use
2228 
2229    Notes:
2230 
2231    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2232    than it must be used on all processors that share the object for that argument.
2233 
2234    If the *_nnz parameter is given then the *_nz parameter is ignored
2235 
2236    Storage Information:
2237    For a square global matrix we define each processor's diagonal portion
2238    to be its local rows and the corresponding columns (a square submatrix);
2239    each processor's off-diagonal portion encompasses the remainder of the
2240    local matrix (a rectangular submatrix).
2241 
2242    The user can specify preallocated storage for the diagonal part of
2243    the local submatrix with either d_nz or d_nnz (not both).  Set
2244    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2245    memory allocation.  Likewise, specify preallocated storage for the
2246    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2247 
2248    You can call MatGetInfo() to get information on how effective the preallocation was;
2249    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2250    You can also run with the option -info and look for messages with the string
2251    malloc in them to see if additional memory allocation was needed.
2252 
2253    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2254    the figure below we depict these three local rows and all columns (0-11).
2255 
2256 .vb
2257            0 1 2 3 4 5 6 7 8 9 10 11
2258           --------------------------
2259    row 3  |. . . d d d o o o o  o  o
2260    row 4  |. . . d d d o o o o  o  o
2261    row 5  |. . . d d d o o o o  o  o
2262           --------------------------
2263 .ve
2264 
2265    Thus, any entries in the d locations are stored in the d (diagonal)
2266    submatrix, and any entries in the o locations are stored in the
2267    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2268    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2269 
2270    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2271    plus the diagonal part of the d matrix,
2272    and o_nz should indicate the number of block nonzeros per row in the o matrix
2273 
2274    In general, for PDE problems in which most nonzeros are near the diagonal,
2275    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2276    or you will get TERRIBLE performance; see the users' manual chapter on
2277    matrices.
2278 
2279    Level: intermediate
2280 
2281 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2282 @*/
2283 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2284 {
2285   PetscFunctionBegin;
2286   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2287   PetscValidType(B,1);
2288   PetscValidLogicalCollectiveInt(B,bs,2);
2289   PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 /*@C
2294    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2295    (block compressed row).  For good matrix assembly performance
2296    the user should preallocate the matrix storage by setting the parameters
2297    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2298    performance can be increased by more than a factor of 50.
2299 
2300    Collective
2301 
2302    Input Parameters:
2303 +  comm - MPI communicator
2304 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2305           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2306 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2307            This value should be the same as the local size used in creating the
2308            y vector for the matrix-vector product y = Ax.
2309 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2310            This value should be the same as the local size used in creating the
2311            x vector for the matrix-vector product y = Ax.
2312 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2313 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2314 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2315            submatrix  (same for all local rows)
2316 .  d_nnz - array containing the number of block nonzeros in the various block rows
2317            in the upper triangular portion of the in diagonal portion of the local
2318            (possibly different for each block block row) or NULL.
2319            If you plan to factor the matrix you must leave room for the diagonal entry and
2320            set its value even if it is zero.
2321 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2322            submatrix (same for all local rows).
2323 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2324            off-diagonal portion of the local submatrix (possibly different for
2325            each block row) or NULL.
2326 
2327    Output Parameter:
2328 .  A - the matrix
2329 
2330    Options Database Keys:
2331 +   -mat_no_unroll - uses code that does not unroll the loops in the
2332                      block calculations (much slower)
2333 .   -mat_block_size - size of the blocks to use
2334 -   -mat_mpi - use the parallel matrix data structures even on one processor
2335                (defaults to using SeqBAIJ format on one processor)
2336 
2337    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2338    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2339    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2340 
2341    Notes:
2342    The number of rows and columns must be divisible by blocksize.
2343    This matrix type does not support complex Hermitian operation.
2344 
2345    The user MUST specify either the local or global matrix dimensions
2346    (possibly both).
2347 
2348    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2349    than it must be used on all processors that share the object for that argument.
2350 
2351    If the *_nnz parameter is given then the *_nz parameter is ignored
2352 
2353    Storage Information:
2354    For a square global matrix we define each processor's diagonal portion
2355    to be its local rows and the corresponding columns (a square submatrix);
2356    each processor's off-diagonal portion encompasses the remainder of the
2357    local matrix (a rectangular submatrix).
2358 
2359    The user can specify preallocated storage for the diagonal part of
2360    the local submatrix with either d_nz or d_nnz (not both).  Set
2361    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2362    memory allocation.  Likewise, specify preallocated storage for the
2363    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2364 
2365    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2366    the figure below we depict these three local rows and all columns (0-11).
2367 
2368 .vb
2369            0 1 2 3 4 5 6 7 8 9 10 11
2370           --------------------------
2371    row 3  |. . . d d d o o o o  o  o
2372    row 4  |. . . d d d o o o o  o  o
2373    row 5  |. . . d d d o o o o  o  o
2374           --------------------------
2375 .ve
2376 
2377    Thus, any entries in the d locations are stored in the d (diagonal)
2378    submatrix, and any entries in the o locations are stored in the
2379    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2380    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2381 
2382    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2383    plus the diagonal part of the d matrix,
2384    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2385    In general, for PDE problems in which most nonzeros are near the diagonal,
2386    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2387    or you will get TERRIBLE performance; see the users' manual chapter on
2388    matrices.
2389 
2390    Level: intermediate
2391 
2392 .seealso: `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2393 @*/
2394 
2395 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2396 {
2397   PetscMPIInt    size;
2398 
2399   PetscFunctionBegin;
2400   PetscCall(MatCreate(comm,A));
2401   PetscCall(MatSetSizes(*A,m,n,M,N));
2402   PetscCallMPI(MPI_Comm_size(comm,&size));
2403   if (size > 1) {
2404     PetscCall(MatSetType(*A,MATMPISBAIJ));
2405     PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
2406   } else {
2407     PetscCall(MatSetType(*A,MATSEQSBAIJ));
2408     PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
2409   }
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2414 {
2415   Mat            mat;
2416   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2417   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2418   PetscScalar    *array;
2419 
2420   PetscFunctionBegin;
2421   *newmat = NULL;
2422 
2423   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2424   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2425   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2426   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2427   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2428 
2429   mat->factortype   = matin->factortype;
2430   mat->preallocated = PETSC_TRUE;
2431   mat->assembled    = PETSC_TRUE;
2432   mat->insertmode   = NOT_SET_VALUES;
2433 
2434   a      = (Mat_MPISBAIJ*)mat->data;
2435   a->bs2 = oldmat->bs2;
2436   a->mbs = oldmat->mbs;
2437   a->nbs = oldmat->nbs;
2438   a->Mbs = oldmat->Mbs;
2439   a->Nbs = oldmat->Nbs;
2440 
2441   a->size         = oldmat->size;
2442   a->rank         = oldmat->rank;
2443   a->donotstash   = oldmat->donotstash;
2444   a->roworiented  = oldmat->roworiented;
2445   a->rowindices   = NULL;
2446   a->rowvalues    = NULL;
2447   a->getrowactive = PETSC_FALSE;
2448   a->barray       = NULL;
2449   a->rstartbs     = oldmat->rstartbs;
2450   a->rendbs       = oldmat->rendbs;
2451   a->cstartbs     = oldmat->cstartbs;
2452   a->cendbs       = oldmat->cendbs;
2453 
2454   /* hash table stuff */
2455   a->ht           = NULL;
2456   a->hd           = NULL;
2457   a->ht_size      = 0;
2458   a->ht_flag      = oldmat->ht_flag;
2459   a->ht_fact      = oldmat->ht_fact;
2460   a->ht_total_ct  = 0;
2461   a->ht_insert_ct = 0;
2462 
2463   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2));
2464   if (oldmat->colmap) {
2465 #if defined(PETSC_USE_CTABLE)
2466     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2467 #else
2468     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
2469     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
2470     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
2471 #endif
2472   } else a->colmap = NULL;
2473 
2474   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2475     PetscCall(PetscMalloc1(len,&a->garray));
2476     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2477     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2478   } else a->garray = NULL;
2479 
2480   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
2481   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2482   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2483   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2484   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2485 
2486   PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0));
2487   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2488   PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1));
2489   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2490 
2491   PetscCall(VecGetLocalSize(a->slvec1,&nt));
2492   PetscCall(VecGetArray(a->slvec1,&array));
2493   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a));
2494   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b));
2495   PetscCall(VecRestoreArray(a->slvec1,&array));
2496   PetscCall(VecGetArray(a->slvec0,&array));
2497   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b));
2498   PetscCall(VecRestoreArray(a->slvec0,&array));
2499   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2500   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2501   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b));
2502   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a));
2503   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b));
2504 
2505   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2506   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2507   a->sMvctx = oldmat->sMvctx;
2508   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx));
2509 
2510   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2511   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2512   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2513   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2514   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2515   *newmat = mat;
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2520 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2521 
2522 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2523 {
2524   PetscBool      isbinary;
2525 
2526   PetscFunctionBegin;
2527   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2528   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2529   PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer));
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 /*XXXXX@
2534    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2535 
2536    Input Parameters:
2537 .  mat  - the matrix
2538 .  fact - factor
2539 
2540    Not Collective on Mat, each process can have a different hash factor
2541 
2542    Level: advanced
2543 
2544   Notes:
2545    This can also be set by the command line option: -mat_use_hash_table fact
2546 
2547 .seealso: `MatSetOption()`
2548 @XXXXX*/
2549 
2550 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2551 {
2552   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2553   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2554   PetscReal      atmp;
2555   PetscReal      *work,*svalues,*rvalues;
2556   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2557   PetscMPIInt    rank,size;
2558   PetscInt       *rowners_bs,dest,count,source;
2559   PetscScalar    *va;
2560   MatScalar      *ba;
2561   MPI_Status     stat;
2562 
2563   PetscFunctionBegin;
2564   PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2565   PetscCall(MatGetRowMaxAbs(a->A,v,NULL));
2566   PetscCall(VecGetArray(v,&va));
2567 
2568   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2569   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2570 
2571   bs  = A->rmap->bs;
2572   mbs = a->mbs;
2573   Mbs = a->Mbs;
2574   ba  = b->a;
2575   bi  = b->i;
2576   bj  = b->j;
2577 
2578   /* find ownerships */
2579   rowners_bs = A->rmap->range;
2580 
2581   /* each proc creates an array to be distributed */
2582   PetscCall(PetscCalloc1(bs*Mbs,&work));
2583 
2584   /* row_max for B */
2585   if (rank != size-1) {
2586     for (i=0; i<mbs; i++) {
2587       ncols = bi[1] - bi[0]; bi++;
2588       brow  = bs*i;
2589       for (j=0; j<ncols; j++) {
2590         bcol = bs*(*bj);
2591         for (kcol=0; kcol<bs; kcol++) {
2592           col  = bcol + kcol;                /* local col index */
2593           col += rowners_bs[rank+1];      /* global col index */
2594           for (krow=0; krow<bs; krow++) {
2595             atmp = PetscAbsScalar(*ba); ba++;
2596             row  = brow + krow;   /* local row index */
2597             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2598             if (work[col] < atmp) work[col] = atmp;
2599           }
2600         }
2601         bj++;
2602       }
2603     }
2604 
2605     /* send values to its owners */
2606     for (dest=rank+1; dest<size; dest++) {
2607       svalues = work + rowners_bs[dest];
2608       count   = rowners_bs[dest+1]-rowners_bs[dest];
2609       PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A)));
2610     }
2611   }
2612 
2613   /* receive values */
2614   if (rank) {
2615     rvalues = work;
2616     count   = rowners_bs[rank+1]-rowners_bs[rank];
2617     for (source=0; source<rank; source++) {
2618       PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat));
2619       /* process values */
2620       for (i=0; i<count; i++) {
2621         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2622       }
2623     }
2624   }
2625 
2626   PetscCall(VecRestoreArray(v,&va));
2627   PetscCall(PetscFree(work));
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2632 {
2633   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2634   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2635   PetscScalar       *x,*ptr,*from;
2636   Vec               bb1;
2637   const PetscScalar *b;
2638 
2639   PetscFunctionBegin;
2640   PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
2641   PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2642 
2643   if (flag == SOR_APPLY_UPPER) {
2644     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2645     PetscFunctionReturn(0);
2646   }
2647 
2648   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2649     if (flag & SOR_ZERO_INITIAL_GUESS) {
2650       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx));
2651       its--;
2652     }
2653 
2654     PetscCall(VecDuplicate(bb,&bb1));
2655     while (its--) {
2656 
2657       /* lower triangular part: slvec0b = - B^T*xx */
2658       PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2659 
2660       /* copy xx into slvec0a */
2661       PetscCall(VecGetArray(mat->slvec0,&ptr));
2662       PetscCall(VecGetArray(xx,&x));
2663       PetscCall(PetscArraycpy(ptr,x,bs*mbs));
2664       PetscCall(VecRestoreArray(mat->slvec0,&ptr));
2665 
2666       PetscCall(VecScale(mat->slvec0,-1.0));
2667 
2668       /* copy bb into slvec1a */
2669       PetscCall(VecGetArray(mat->slvec1,&ptr));
2670       PetscCall(VecGetArrayRead(bb,&b));
2671       PetscCall(PetscArraycpy(ptr,b,bs*mbs));
2672       PetscCall(VecRestoreArray(mat->slvec1,&ptr));
2673 
2674       /* set slvec1b = 0 */
2675       PetscCall(VecSet(mat->slvec1b,0.0));
2676 
2677       PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2678       PetscCall(VecRestoreArray(xx,&x));
2679       PetscCall(VecRestoreArrayRead(bb,&b));
2680       PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2681 
2682       /* upper triangular part: bb1 = bb1 - B*x */
2683       PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1));
2684 
2685       /* local diagonal sweep */
2686       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx));
2687     }
2688     PetscCall(VecDestroy(&bb1));
2689   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2690     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2691   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2692     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2693   } else if (flag & SOR_EISENSTAT) {
2694     Vec               xx1;
2695     PetscBool         hasop;
2696     const PetscScalar *diag;
2697     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2698     PetscInt          i,n;
2699 
2700     if (!mat->xx1) {
2701       PetscCall(VecDuplicate(bb,&mat->xx1));
2702       PetscCall(VecDuplicate(bb,&mat->bb1));
2703     }
2704     xx1 = mat->xx1;
2705     bb1 = mat->bb1;
2706 
2707     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
2708 
2709     if (!mat->diag) {
2710       /* this is wrong for same matrix with new nonzero values */
2711       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
2712       PetscCall(MatGetDiagonal(matin,mat->diag));
2713     }
2714     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
2715 
2716     if (hasop) {
2717       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
2718       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2719     } else {
2720       /*
2721           These two lines are replaced by code that may be a bit faster for a good compiler
2722       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2723       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2724       */
2725       PetscCall(VecGetArray(mat->slvec1a,&sl));
2726       PetscCall(VecGetArrayRead(mat->diag,&diag));
2727       PetscCall(VecGetArrayRead(bb,&b));
2728       PetscCall(VecGetArray(xx,&x));
2729       PetscCall(VecGetLocalSize(xx,&n));
2730       if (omega == 1.0) {
2731         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2732         PetscCall(PetscLogFlops(2.0*n));
2733       } else {
2734         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2735         PetscCall(PetscLogFlops(3.0*n));
2736       }
2737       PetscCall(VecRestoreArray(mat->slvec1a,&sl));
2738       PetscCall(VecRestoreArrayRead(mat->diag,&diag));
2739       PetscCall(VecRestoreArrayRead(bb,&b));
2740       PetscCall(VecRestoreArray(xx,&x));
2741     }
2742 
2743     /* multiply off-diagonal portion of matrix */
2744     PetscCall(VecSet(mat->slvec1b,0.0));
2745     PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2746     PetscCall(VecGetArray(mat->slvec0,&from));
2747     PetscCall(VecGetArray(xx,&x));
2748     PetscCall(PetscArraycpy(from,x,bs*mbs));
2749     PetscCall(VecRestoreArray(mat->slvec0,&from));
2750     PetscCall(VecRestoreArray(xx,&x));
2751     PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2752     PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2753     PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a));
2754 
2755     /* local sweep */
2756     PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
2757     PetscCall(VecAXPY(xx,1.0,xx1));
2758   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2759   PetscFunctionReturn(0);
2760 }
2761 
2762 /*@
2763      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2764          CSR format the local rows.
2765 
2766    Collective
2767 
2768    Input Parameters:
2769 +  comm - MPI communicator
2770 .  bs - the block size, only a block size of 1 is supported
2771 .  m - number of local rows (Cannot be PETSC_DECIDE)
2772 .  n - This value should be the same as the local size used in creating the
2773        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2774        calculated if N is given) For square matrices n is almost always m.
2775 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2776 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2777 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2778 .   j - column indices
2779 -   a - matrix values
2780 
2781    Output Parameter:
2782 .   mat - the matrix
2783 
2784    Level: intermediate
2785 
2786    Notes:
2787        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2788      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2789      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2790 
2791        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2792 
2793 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2794           `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2795 @*/
2796 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2797 {
2798   PetscFunctionBegin;
2799   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2800   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2801   PetscCall(MatCreate(comm,mat));
2802   PetscCall(MatSetSizes(*mat,m,n,M,N));
2803   PetscCall(MatSetType(*mat,MATMPISBAIJ));
2804   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a));
2805   PetscFunctionReturn(0);
2806 }
2807 
2808 /*@C
2809    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2810 
2811    Collective
2812 
2813    Input Parameters:
2814 +  B - the matrix
2815 .  bs - the block size
2816 .  i - the indices into j for the start of each local row (starts with zero)
2817 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2818 -  v - optional values in the matrix
2819 
2820    Level: advanced
2821 
2822    Notes:
2823    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2824    and usually the numerical values as well
2825 
2826    Any entries below the diagonal are ignored
2827 
2828 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2829 @*/
2830 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2831 {
2832   PetscFunctionBegin;
2833   PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2838 {
2839   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2840   PetscInt       *indx;
2841   PetscScalar    *values;
2842 
2843   PetscFunctionBegin;
2844   PetscCall(MatGetSize(inmat,&m,&N));
2845   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2846     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2847     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2848     PetscInt       *bindx,rmax=a->rmax,j;
2849     PetscMPIInt    rank,size;
2850 
2851     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2852     mbs = m/bs; Nbs = N/cbs;
2853     if (n == PETSC_DECIDE) {
2854       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
2855     }
2856     nbs = n/cbs;
2857 
2858     PetscCall(PetscMalloc1(rmax,&bindx));
2859     MatPreallocateBegin(comm,mbs,nbs,dnz,onz); /* inline function, output __end and __rstart are used below */
2860 
2861     PetscCallMPI(MPI_Comm_rank(comm,&rank));
2862     PetscCallMPI(MPI_Comm_rank(comm,&size));
2863     if (rank == size-1) {
2864       /* Check sum(nbs) = Nbs */
2865       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
2866     }
2867 
2868     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2869     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2870     for (i=0; i<mbs; i++) {
2871       PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
2872       nnz  = nnz/bs;
2873       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2874       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
2875       PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL));
2876     }
2877     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2878     PetscCall(PetscFree(bindx));
2879 
2880     PetscCall(MatCreate(comm,outmat));
2881     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
2882     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
2883     PetscCall(MatSetType(*outmat,MATSBAIJ));
2884     PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz));
2885     PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
2886     MatPreallocateEnd(dnz,onz);
2887   }
2888 
2889   /* numeric phase */
2890   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2891   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
2892 
2893   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2894   for (i=0; i<m; i++) {
2895     PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2896     Ii   = i + rstart;
2897     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
2898     PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2899   }
2900   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2901   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
2902   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
2903   PetscFunctionReturn(0);
2904 }
2905