xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 0369a392ee39ea4d01e519e42e389892d4e1e2e4)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21 
22   PetscFunctionBegin;
23   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
24   PetscValidLogicalCollectiveInt(B,nm,2);
25   for (i = 0; i < nm; i++) {
26     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
27     PetscCall(PetscLayoutCompare(B->rmap,X[i]->rmap,&cong));
28     PetscCheck(cong,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
29   }
30   PetscValidLogicalCollectiveBool(B,fill,5);
31   PetscCall(MatGetBlockSize(B,&bs));
32   PetscCall(MatGetSize(B,&M,&N));
33   PetscCall(MatGetLocalSize(B,&m,&n));
34   PetscCall(MatCreate(PetscObjectComm((PetscObject)B),&preallocator));
35   PetscCall(MatSetType(preallocator,MATPREALLOCATOR));
36   PetscCall(MatSetBlockSize(preallocator,bs));
37   PetscCall(MatSetSizes(preallocator,m,n,M,N));
38   PetscCall(MatSetUp(preallocator));
39   PetscCall(MatGetOwnershipRange(preallocator,&rstart,&rend));
40   for (r = rstart; r < rend; ++r) {
41     PetscInt          ncols;
42     const PetscInt    *row;
43     const PetscScalar *vals;
44 
45     for (i = 0; i < nm; i++) {
46       PetscCall(MatGetRow(X[i],r,&ncols,&row,&vals));
47       PetscCall(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES));
48       if (symm && symm[i]) {
49         PetscCall(MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES));
50       }
51       PetscCall(MatRestoreRow(X[i],r,&ncols,&row,&vals));
52     }
53   }
54   PetscCall(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY));
55   PetscCall(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY));
56   PetscCall(MatPreallocatorPreallocate(preallocator,fill,B));
57   PetscCall(MatDestroy(&preallocator));
58   PetscFunctionReturn(0);
59 }
60 
61 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
62 {
63   Mat            B;
64   PetscInt       r;
65 
66   PetscFunctionBegin;
67   if (reuse != MAT_REUSE_MATRIX) {
68     PetscBool symm = PETSC_TRUE,isdense;
69     PetscInt  bs;
70 
71     PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
72     PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
73     PetscCall(MatSetType(B,newtype));
74     PetscCall(MatGetBlockSize(A,&bs));
75     PetscCall(MatSetBlockSize(B,bs));
76     PetscCall(PetscLayoutSetUp(B->rmap));
77     PetscCall(PetscLayoutSetUp(B->cmap));
78     PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,""));
79     if (!isdense) {
80       PetscCall(MatGetRowUpperTriangular(A));
81       PetscCall(MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE));
82       PetscCall(MatRestoreRowUpperTriangular(A));
83     } else {
84       PetscCall(MatSetUp(B));
85     }
86   } else {
87     B    = *newmat;
88     PetscCall(MatZeroEntries(B));
89   }
90 
91   PetscCall(MatGetRowUpperTriangular(A));
92   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
93     PetscInt          ncols;
94     const PetscInt    *row;
95     const PetscScalar *vals;
96 
97     PetscCall(MatGetRow(A,r,&ncols,&row,&vals));
98     PetscCall(MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES));
99 #if defined(PETSC_USE_COMPLEX)
100     if (A->hermitian) {
101       PetscInt i;
102       for (i = 0; i < ncols; i++) {
103         PetscCall(MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES));
104       }
105     } else {
106       PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
107     }
108 #else
109     PetscCall(MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES));
110 #endif
111     PetscCall(MatRestoreRow(A,r,&ncols,&row,&vals));
112   }
113   PetscCall(MatRestoreRowUpperTriangular(A));
114   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
115   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
116 
117   if (reuse == MAT_INPLACE_MATRIX) {
118     PetscCall(MatHeaderReplace(A,&B));
119   } else {
120     *newmat = B;
121   }
122   PetscFunctionReturn(0);
123 }
124 
125 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
126 {
127   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
128 
129   PetscFunctionBegin;
130   PetscCall(MatStoreValues(aij->A));
131   PetscCall(MatStoreValues(aij->B));
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138 
139   PetscFunctionBegin;
140   PetscCall(MatRetrieveValues(aij->A));
141   PetscCall(MatRetrieveValues(aij->B));
142   PetscFunctionReturn(0);
143 }
144 
145 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
146   { \
147     brow = row/bs;  \
148     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
149     rmax = aimax[brow]; nrow = ailen[brow]; \
150     bcol = col/bs; \
151     ridx = row % bs; cidx = col % bs; \
152     low  = 0; high = nrow; \
153     while (high-low > 3) { \
154       t = (low+high)/2; \
155       if (rp[t] > bcol) high = t; \
156       else              low  = t; \
157     } \
158     for (_i=low; _i<high; _i++) { \
159       if (rp[_i] > bcol) break; \
160       if (rp[_i] == bcol) { \
161         bap = ap + bs2*_i + bs*cidx + ridx; \
162         if (addv == ADD_VALUES) *bap += value;  \
163         else                    *bap  = value;  \
164         goto a_noinsert; \
165       } \
166     } \
167     if (a->nonew == 1) goto a_noinsert; \
168     PetscCheck(a->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
169     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
170     N = nrow++ - 1;  \
171     /* shift up all the later entries in this row */ \
172     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
173     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
174     PetscCall(PetscArrayzero(ap+bs2*_i,bs2));  \
175     rp[_i]                      = bcol;  \
176     ap[bs2*_i + bs*cidx + ridx] = value;  \
177     A->nonzerostate++;\
178 a_noinsert:; \
179     ailen[brow] = nrow; \
180   }
181 
182 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
183   { \
184     brow = row/bs;  \
185     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
186     rmax = bimax[brow]; nrow = bilen[brow]; \
187     bcol = col/bs; \
188     ridx = row % bs; cidx = col % bs; \
189     low  = 0; high = nrow; \
190     while (high-low > 3) { \
191       t = (low+high)/2; \
192       if (rp[t] > bcol) high = t; \
193       else              low  = t; \
194     } \
195     for (_i=low; _i<high; _i++) { \
196       if (rp[_i] > bcol) break; \
197       if (rp[_i] == bcol) { \
198         bap = ap + bs2*_i + bs*cidx + ridx; \
199         if (addv == ADD_VALUES) *bap += value;  \
200         else                    *bap  = value;  \
201         goto b_noinsert; \
202       } \
203     } \
204     if (b->nonew == 1) goto b_noinsert; \
205     PetscCheck(b->nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
206     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
207     N = nrow++ - 1;  \
208     /* shift up all the later entries in this row */ \
209     PetscCall(PetscArraymove(rp+_i+1,rp+_i,N-_i+1)); \
210     PetscCall(PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1))); \
211     PetscCall(PetscArrayzero(ap+bs2*_i,bs2)); \
212     rp[_i]                      = bcol;  \
213     ap[bs2*_i + bs*cidx + ridx] = value;  \
214     B->nonzerostate++;\
215 b_noinsert:; \
216     bilen[brow] = nrow; \
217   }
218 
219 /* Only add/insert a(i,j) with i<=j (blocks).
220    Any a(i,j) with i>j input by user is ingored or generates an error
221 */
222 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
223 {
224   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
225   MatScalar      value;
226   PetscBool      roworiented = baij->roworiented;
227   PetscInt       i,j,row,col;
228   PetscInt       rstart_orig=mat->rmap->rstart;
229   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
230   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
231 
232   /* Some Variables required in the macro */
233   Mat          A     = baij->A;
234   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
235   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
236   MatScalar    *aa   =a->a;
237 
238   Mat         B     = baij->B;
239   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
240   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
241   MatScalar   *ba   =b->a;
242 
243   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
244   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
245   MatScalar *ap,*bap;
246 
247   /* for stash */
248   PetscInt  n_loc, *in_loc = NULL;
249   MatScalar *v_loc = NULL;
250 
251   PetscFunctionBegin;
252   if (!baij->donotstash) {
253     if (n > baij->n_loc) {
254       PetscCall(PetscFree(baij->in_loc));
255       PetscCall(PetscFree(baij->v_loc));
256       PetscCall(PetscMalloc1(n,&baij->in_loc));
257       PetscCall(PetscMalloc1(n,&baij->v_loc));
258 
259       baij->n_loc = n;
260     }
261     in_loc = baij->in_loc;
262     v_loc  = baij->v_loc;
263   }
264 
265   for (i=0; i<m; i++) {
266     if (im[i] < 0) continue;
267     PetscCheck(im[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1);
268     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
269       row = im[i] - rstart_orig;              /* local row index */
270       for (j=0; j<n; j++) {
271         if (im[i]/bs > in[j]/bs) {
272           if (a->ignore_ltriangular) {
273             continue;    /* ignore lower triangular blocks */
274           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
275         }
276         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
277           col  = in[j] - cstart_orig;         /* local col index */
278           brow = row/bs; bcol = col/bs;
279           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
280           if (roworiented) value = v[i*n+j];
281           else             value = v[i+j*m];
282           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
283           /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
284         } else if (in[j] < 0) continue;
285         else PetscCheck(in[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1);
286         else {  /* off-diag entry (B) */
287           if (mat->was_assembled) {
288             if (!baij->colmap) {
289               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
290             }
291 #if defined(PETSC_USE_CTABLE)
292             PetscCall(PetscTableFind(baij->colmap,in[j]/bs + 1,&col));
293             col  = col - 1;
294 #else
295             col = baij->colmap[in[j]/bs] - 1;
296 #endif
297             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
298               PetscCall(MatDisAssemble_MPISBAIJ(mat));
299               col  =  in[j];
300               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
301               B    = baij->B;
302               b    = (Mat_SeqBAIJ*)(B)->data;
303               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
304               ba   = b->a;
305             } else col += in[j]%bs;
306           } else col = in[j];
307           if (roworiented) value = v[i*n+j];
308           else             value = v[i+j*m];
309           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
310           /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
311         }
312       }
313     } else {  /* off processor entry */
314       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
315       if (!baij->donotstash) {
316         mat->assembled = PETSC_FALSE;
317         n_loc          = 0;
318         for (j=0; j<n; j++) {
319           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
320           in_loc[n_loc] = in[j];
321           if (roworiented) {
322             v_loc[n_loc] = v[i*n+j];
323           } else {
324             v_loc[n_loc] = v[j*m+i];
325           }
326           n_loc++;
327         }
328         PetscCall(MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE));
329       }
330     }
331   }
332   PetscFunctionReturn(0);
333 }
334 
335 static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
336 {
337   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
338   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
339   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
340   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
341   PetscBool         roworiented=a->roworiented;
342   const PetscScalar *value     = v;
343   MatScalar         *ap,*aa = a->a,*bap;
344 
345   PetscFunctionBegin;
346   if (col < row) {
347     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
348     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
349   }
350   rp   = aj + ai[row];
351   ap   = aa + bs2*ai[row];
352   rmax = imax[row];
353   nrow = ailen[row];
354   value = v;
355   low   = 0;
356   high  = nrow;
357 
358   while (high-low > 7) {
359     t = (low+high)/2;
360     if (rp[t] > col) high = t;
361     else             low  = t;
362   }
363   for (i=low; i<high; i++) {
364     if (rp[i] > col) break;
365     if (rp[i] == col) {
366       bap = ap +  bs2*i;
367       if (roworiented) {
368         if (is == ADD_VALUES) {
369           for (ii=0; ii<bs; ii++) {
370             for (jj=ii; jj<bs2; jj+=bs) {
371               bap[jj] += *value++;
372             }
373           }
374         } else {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] = *value++;
378             }
379           }
380         }
381       } else {
382         if (is == ADD_VALUES) {
383           for (ii=0; ii<bs; ii++) {
384             for (jj=0; jj<bs; jj++) {
385               *bap++ += *value++;
386             }
387           }
388         } else {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++  = *value++;
392             }
393           }
394         }
395       }
396       goto noinsert2;
397     }
398   }
399   if (nonew == 1) goto noinsert2;
400   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
401   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
402   N = nrow++ - 1; high++;
403   /* shift up all the later entries in this row */
404   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
405   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
406   rp[i] = col;
407   bap   = ap +  bs2*i;
408   if (roworiented) {
409     for (ii=0; ii<bs; ii++) {
410       for (jj=ii; jj<bs2; jj+=bs) {
411         bap[jj] = *value++;
412       }
413     }
414   } else {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=0; jj<bs; jj++) {
417         *bap++ = *value++;
418       }
419     }
420   }
421   noinsert2:;
422   ailen[row] = nrow;
423   PetscFunctionReturn(0);
424 }
425 
426 /*
427    This routine is exactly duplicated in mpibaij.c
428 */
429 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
430 {
431   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
432   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
433   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
434   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
435   PetscBool         roworiented=a->roworiented;
436   const PetscScalar *value     = v;
437   MatScalar         *ap,*aa = a->a,*bap;
438 
439   PetscFunctionBegin;
440   rp   = aj + ai[row];
441   ap   = aa + bs2*ai[row];
442   rmax = imax[row];
443   nrow = ailen[row];
444   low  = 0;
445   high = nrow;
446   value = v;
447   while (high-low > 7) {
448     t = (low+high)/2;
449     if (rp[t] > col) high = t;
450     else             low  = t;
451   }
452   for (i=low; i<high; i++) {
453     if (rp[i] > col) break;
454     if (rp[i] == col) {
455       bap = ap +  bs2*i;
456       if (roworiented) {
457         if (is == ADD_VALUES) {
458           for (ii=0; ii<bs; ii++) {
459             for (jj=ii; jj<bs2; jj+=bs) {
460               bap[jj] += *value++;
461             }
462           }
463         } else {
464           for (ii=0; ii<bs; ii++) {
465             for (jj=ii; jj<bs2; jj+=bs) {
466               bap[jj] = *value++;
467             }
468           }
469         }
470       } else {
471         if (is == ADD_VALUES) {
472           for (ii=0; ii<bs; ii++,value+=bs) {
473             for (jj=0; jj<bs; jj++) {
474               bap[jj] += value[jj];
475             }
476             bap += bs;
477           }
478         } else {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj]  = value[jj];
482             }
483             bap += bs;
484           }
485         }
486       }
487       goto noinsert2;
488     }
489   }
490   if (nonew == 1) goto noinsert2;
491   PetscCheck(nonew != -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
492   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
493   N = nrow++ - 1; high++;
494   /* shift up all the later entries in this row */
495   PetscCall(PetscArraymove(rp+i+1,rp+i,N-i+1));
496   PetscCall(PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1)));
497   rp[i] = col;
498   bap   = ap +  bs2*i;
499   if (roworiented) {
500     for (ii=0; ii<bs; ii++) {
501       for (jj=ii; jj<bs2; jj+=bs) {
502         bap[jj] = *value++;
503       }
504     }
505   } else {
506     for (ii=0; ii<bs; ii++) {
507       for (jj=0; jj<bs; jj++) {
508         *bap++ = *value++;
509       }
510     }
511   }
512   noinsert2:;
513   ailen[row] = nrow;
514   PetscFunctionReturn(0);
515 }
516 
517 /*
518     This routine could be optimized by removing the need for the block copy below and passing stride information
519   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
520 */
521 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
522 {
523   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
524   const MatScalar *value;
525   MatScalar       *barray     =baij->barray;
526   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
527   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
528   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
529   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
530 
531   PetscFunctionBegin;
532   if (!barray) {
533     PetscCall(PetscMalloc1(bs2,&barray));
534     baij->barray = barray;
535   }
536 
537   if (roworiented) {
538     stepval = (n-1)*bs;
539   } else {
540     stepval = (m-1)*bs;
541   }
542   for (i=0; i<m; i++) {
543     if (im[i] < 0) continue;
544     PetscCheck(im[i] < baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1);
545     if (im[i] >= rstart && im[i] < rend) {
546       row = im[i] - rstart;
547       for (j=0; j<n; j++) {
548         if (im[i] > in[j]) {
549           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
550           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
551         }
552         /* If NumCol = 1 then a copy is not required */
553         if ((roworiented) && (n == 1)) {
554           barray = (MatScalar*) v + i*bs2;
555         } else if ((!roworiented) && (m == 1)) {
556           barray = (MatScalar*) v + j*bs2;
557         } else { /* Here a copy is required */
558           if (roworiented) {
559             value = v + i*(stepval+bs)*bs + j*bs;
560           } else {
561             value = v + j*(stepval+bs)*bs + i*bs;
562           }
563           for (ii=0; ii<bs; ii++,value+=stepval) {
564             for (jj=0; jj<bs; jj++) {
565               *barray++ = *value++;
566             }
567           }
568           barray -=bs2;
569         }
570 
571         if (in[j] >= cstart && in[j] < cend) {
572           col  = in[j] - cstart;
573           PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]));
574         } else if (in[j] < 0) continue;
575         else PetscCheck(in[j] < baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1);
576         else {
577           if (mat->was_assembled) {
578             if (!baij->colmap) {
579               PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
580             }
581 
582 #if defined(PETSC_USE_DEBUG)
583 #if defined(PETSC_USE_CTABLE)
584             { PetscInt data;
585               PetscCall(PetscTableFind(baij->colmap,in[j]+1,&data));
586               PetscCheckFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
587             }
588 #else
589             PetscCheckFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
590 #endif
591 #endif
592 #if defined(PETSC_USE_CTABLE)
593             PetscCall(PetscTableFind(baij->colmap,in[j]+1,&col));
594             col  = (col - 1)/bs;
595 #else
596             col = (baij->colmap[in[j]] - 1)/bs;
597 #endif
598             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
599               PetscCall(MatDisAssemble_MPISBAIJ(mat));
600               col  = in[j];
601             }
602           } else col = in[j];
603           PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]));
604         }
605       }
606     } else {
607       PetscCheck(!mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
608       if (!baij->donotstash) {
609         if (roworiented) {
610           PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
611         } else {
612           PetscCall(MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i));
613         }
614       }
615     }
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
621 {
622   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
623   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
624   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
625 
626   PetscFunctionBegin;
627   for (i=0; i<m; i++) {
628     if (idxm[i] < 0) continue; /* negative row */
629     PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1);
630     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
631       row = idxm[i] - bsrstart;
632       for (j=0; j<n; j++) {
633         if (idxn[j] < 0) continue; /* negative column */
634         PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1);
635         if (idxn[j] >= bscstart && idxn[j] < bscend) {
636           col  = idxn[j] - bscstart;
637           PetscCall(MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j));
638         } else {
639           if (!baij->colmap) {
640             PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
641           }
642 #if defined(PETSC_USE_CTABLE)
643           PetscCall(PetscTableFind(baij->colmap,idxn[j]/bs+1,&data));
644           data--;
645 #else
646           data = baij->colmap[idxn[j]/bs]-1;
647 #endif
648           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
649           else {
650             col  = data + idxn[j]%bs;
651             PetscCall(MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j));
652           }
653         }
654       }
655     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
656   }
657   PetscFunctionReturn(0);
658 }
659 
660 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
661 {
662   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
663   PetscReal      sum[2],*lnorm2;
664 
665   PetscFunctionBegin;
666   if (baij->size == 1) {
667     PetscCall(MatNorm(baij->A,type,norm));
668   } else {
669     if (type == NORM_FROBENIUS) {
670       PetscCall(PetscMalloc1(2,&lnorm2));
671       PetscCall(MatNorm(baij->A,type,lnorm2));
672       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
673       PetscCall(MatNorm(baij->B,type,lnorm2));
674       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
675       PetscCall(MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
676       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
677       PetscCall(PetscFree(lnorm2));
678     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
679       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
680       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
681       PetscReal    *rsum,*rsum2,vabs;
682       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
683       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
684       MatScalar    *v;
685 
686       PetscCall(PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2));
687       PetscCall(PetscArrayzero(rsum,mat->cmap->N));
688       /* Amat */
689       v = amat->a; jj = amat->j;
690       for (brow=0; brow<mbs; brow++) {
691         grow = bs*(rstart + brow);
692         nz   = amat->i[brow+1] - amat->i[brow];
693         for (bcol=0; bcol<nz; bcol++) {
694           gcol = bs*(rstart + *jj); jj++;
695           for (col=0; col<bs; col++) {
696             for (row=0; row<bs; row++) {
697               vabs            = PetscAbsScalar(*v); v++;
698               rsum[gcol+col] += vabs;
699               /* non-diagonal block */
700               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
701             }
702           }
703         }
704         PetscCall(PetscLogFlops(nz*bs*bs));
705       }
706       /* Bmat */
707       v = bmat->a; jj = bmat->j;
708       for (brow=0; brow<mbs; brow++) {
709         grow = bs*(rstart + brow);
710         nz = bmat->i[brow+1] - bmat->i[brow];
711         for (bcol=0; bcol<nz; bcol++) {
712           gcol = bs*garray[*jj]; jj++;
713           for (col=0; col<bs; col++) {
714             for (row=0; row<bs; row++) {
715               vabs            = PetscAbsScalar(*v); v++;
716               rsum[gcol+col] += vabs;
717               rsum[grow+row] += vabs;
718             }
719           }
720         }
721         PetscCall(PetscLogFlops(nz*bs*bs));
722       }
723       PetscCall(MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat)));
724       *norm = 0.0;
725       for (col=0; col<mat->cmap->N; col++) {
726         if (rsum2[col] > *norm) *norm = rsum2[col];
727       }
728       PetscCall(PetscFree2(rsum,rsum2));
729     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
730   }
731   PetscFunctionReturn(0);
732 }
733 
734 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
735 {
736   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
737   PetscInt       nstash,reallocs;
738 
739   PetscFunctionBegin;
740   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
741 
742   PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range));
743   PetscCall(MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs));
744   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
745   PetscCall(PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
746   PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs));
747   PetscCall(PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs));
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
752 {
753   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
754   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
755   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
756   PetscInt       *row,*col;
757   PetscBool      other_disassembled;
758   PetscMPIInt    n;
759   PetscBool      r1,r2,r3;
760   MatScalar      *val;
761 
762   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
763   PetscFunctionBegin;
764   if (!baij->donotstash &&  !mat->nooffprocentries) {
765     while (1) {
766       PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg));
767       if (!flg) break;
768 
769       for (i=0; i<n;) {
770         /* Now identify the consecutive vals belonging to the same row */
771         for (j=i,rstart=row[j]; j<n; j++) {
772           if (row[j] != rstart) break;
773         }
774         if (j < n) ncols = j-i;
775         else       ncols = n-i;
776         /* Now assemble all these values with a single function call */
777         PetscCall(MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode));
778         i    = j;
779       }
780     }
781     PetscCall(MatStashScatterEnd_Private(&mat->stash));
782     /* Now process the block-stash. Since the values are stashed column-oriented,
783        set the roworiented flag to column oriented, and after MatSetValues()
784        restore the original flags */
785     r1 = baij->roworiented;
786     r2 = a->roworiented;
787     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
788 
789     baij->roworiented = PETSC_FALSE;
790     a->roworiented    = PETSC_FALSE;
791 
792     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
793     while (1) {
794       PetscCall(MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg));
795       if (!flg) break;
796 
797       for (i=0; i<n;) {
798         /* Now identify the consecutive vals belonging to the same row */
799         for (j=i,rstart=row[j]; j<n; j++) {
800           if (row[j] != rstart) break;
801         }
802         if (j < n) ncols = j-i;
803         else       ncols = n-i;
804         PetscCall(MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode));
805         i    = j;
806       }
807     }
808     PetscCall(MatStashScatterEnd_Private(&mat->bstash));
809 
810     baij->roworiented = r1;
811     a->roworiented    = r2;
812 
813     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
814   }
815 
816   PetscCall(MatAssemblyBegin(baij->A,mode));
817   PetscCall(MatAssemblyEnd(baij->A,mode));
818 
819   /* determine if any processor has disassembled, if so we must
820      also disassemble ourselfs, in order that we may reassemble. */
821   /*
822      if nonzero structure of submatrix B cannot change then we know that
823      no processor disassembled thus we can skip this stuff
824   */
825   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
826     PetscCall(MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat)));
827     if (mat->was_assembled && !other_disassembled) {
828       PetscCall(MatDisAssemble_MPISBAIJ(mat));
829     }
830   }
831 
832   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
833     PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */
834   }
835   PetscCall(MatAssemblyBegin(baij->B,mode));
836   PetscCall(MatAssemblyEnd(baij->B,mode));
837 
838   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
839 
840   baij->rowvalues = NULL;
841 
842   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
843   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
844     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
845     PetscCall(MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat)));
846   }
847   PetscFunctionReturn(0);
848 }
849 
850 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
851 #include <petscdraw.h>
852 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
853 {
854   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
855   PetscInt          bs   = mat->rmap->bs;
856   PetscMPIInt       rank = baij->rank;
857   PetscBool         iascii,isdraw;
858   PetscViewer       sviewer;
859   PetscViewerFormat format;
860 
861   PetscFunctionBegin;
862   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
863   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
864   if (iascii) {
865     PetscCall(PetscViewerGetFormat(viewer,&format));
866     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
867       MatInfo info;
868       PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank));
869       PetscCall(MatGetInfo(mat,MAT_LOCAL,&info));
870       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
871       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory));
872       PetscCall(MatGetInfo(baij->A,MAT_LOCAL,&info));
873       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
874       PetscCall(MatGetInfo(baij->B,MAT_LOCAL,&info));
875       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used));
876       PetscCall(PetscViewerFlush(viewer));
877       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
878       PetscCall(PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n"));
879       PetscCall(VecScatterView(baij->Mvctx,viewer));
880       PetscFunctionReturn(0);
881     } else if (format == PETSC_VIEWER_ASCII_INFO) {
882       PetscCall(PetscViewerASCIIPrintf(viewer,"  block size is %" PetscInt_FMT "\n",bs));
883       PetscFunctionReturn(0);
884     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
885       PetscFunctionReturn(0);
886     }
887   }
888 
889   if (isdraw) {
890     PetscDraw draw;
891     PetscBool isnull;
892     PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw));
893     PetscCall(PetscDrawIsNull(draw,&isnull));
894     if (isnull) PetscFunctionReturn(0);
895   }
896 
897   {
898     /* assemble the entire matrix onto first processor. */
899     Mat          A;
900     Mat_SeqSBAIJ *Aloc;
901     Mat_SeqBAIJ  *Bloc;
902     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
903     MatScalar    *a;
904     const char   *matname;
905 
906     /* Should this be the same type as mat? */
907     PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A));
908     if (rank == 0) {
909       PetscCall(MatSetSizes(A,M,N,M,N));
910     } else {
911       PetscCall(MatSetSizes(A,0,0,M,N));
912     }
913     PetscCall(MatSetType(A,MATMPISBAIJ));
914     PetscCall(MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL));
915     PetscCall(MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE));
916     PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A));
917 
918     /* copy over the A part */
919     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
920     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
921     PetscCall(PetscMalloc1(bs,&rvals));
922 
923     for (i=0; i<mbs; i++) {
924       rvals[0] = bs*(baij->rstartbs + i);
925       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
926       for (j=ai[i]; j<ai[i+1]; j++) {
927         col = (baij->cstartbs+aj[j])*bs;
928         for (k=0; k<bs; k++) {
929           PetscCall(MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
930           col++;
931           a += bs;
932         }
933       }
934     }
935     /* copy over the B part */
936     Bloc = (Mat_SeqBAIJ*)baij->B->data;
937     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
938     for (i=0; i<mbs; i++) {
939 
940       rvals[0] = bs*(baij->rstartbs + i);
941       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
942       for (j=ai[i]; j<ai[i+1]; j++) {
943         col = baij->garray[aj[j]]*bs;
944         for (k=0; k<bs; k++) {
945           PetscCall(MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES));
946           col++;
947           a += bs;
948         }
949       }
950     }
951     PetscCall(PetscFree(rvals));
952     PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
953     PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
954     /*
955        Everyone has to call to draw the matrix since the graphics waits are
956        synchronized across all processors that share the PetscDraw object
957     */
958     PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
959     PetscCall(PetscObjectGetName((PetscObject)mat,&matname));
960     if (rank == 0) {
961       PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname));
962       PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer));
963     }
964     PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer));
965     PetscCall(PetscViewerFlush(viewer));
966     PetscCall(MatDestroy(&A));
967   }
968   PetscFunctionReturn(0);
969 }
970 
971 /* Used for both MPIBAIJ and MPISBAIJ matrices */
972 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
973 
974 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
975 {
976   PetscBool      iascii,isdraw,issocket,isbinary;
977 
978   PetscFunctionBegin;
979   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
980   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw));
981   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket));
982   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
983   if (iascii || isdraw || issocket) {
984     PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer));
985   } else if (isbinary) {
986     PetscCall(MatView_MPISBAIJ_Binary(mat,viewer));
987   }
988   PetscFunctionReturn(0);
989 }
990 
991 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
992 {
993   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
994 
995   PetscFunctionBegin;
996 #if defined(PETSC_USE_LOG)
997   PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N);
998 #endif
999   PetscCall(MatStashDestroy_Private(&mat->stash));
1000   PetscCall(MatStashDestroy_Private(&mat->bstash));
1001   PetscCall(MatDestroy(&baij->A));
1002   PetscCall(MatDestroy(&baij->B));
1003 #if defined(PETSC_USE_CTABLE)
1004   PetscCall(PetscTableDestroy(&baij->colmap));
1005 #else
1006   PetscCall(PetscFree(baij->colmap));
1007 #endif
1008   PetscCall(PetscFree(baij->garray));
1009   PetscCall(VecDestroy(&baij->lvec));
1010   PetscCall(VecScatterDestroy(&baij->Mvctx));
1011   PetscCall(VecDestroy(&baij->slvec0));
1012   PetscCall(VecDestroy(&baij->slvec0b));
1013   PetscCall(VecDestroy(&baij->slvec1));
1014   PetscCall(VecDestroy(&baij->slvec1a));
1015   PetscCall(VecDestroy(&baij->slvec1b));
1016   PetscCall(VecScatterDestroy(&baij->sMvctx));
1017   PetscCall(PetscFree2(baij->rowvalues,baij->rowindices));
1018   PetscCall(PetscFree(baij->barray));
1019   PetscCall(PetscFree(baij->hd));
1020   PetscCall(VecDestroy(&baij->diag));
1021   PetscCall(VecDestroy(&baij->bb1));
1022   PetscCall(VecDestroy(&baij->xx1));
1023 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1024   PetscCall(PetscFree(baij->setvaluescopy));
1025 #endif
1026   PetscCall(PetscFree(baij->in_loc));
1027   PetscCall(PetscFree(baij->v_loc));
1028   PetscCall(PetscFree(baij->rangebs));
1029   PetscCall(PetscFree(mat->data));
1030 
1031   PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL));
1032   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL));
1033   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL));
1034   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL));
1035   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL));
1036 #if defined(PETSC_HAVE_ELEMENTAL)
1037   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL));
1038 #endif
1039 #if defined(PETSC_HAVE_SCALAPACK)
1040   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL));
1041 #endif
1042   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL));
1043   PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL));
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1048 {
1049   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1050   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1051   PetscScalar       *from;
1052   const PetscScalar *x;
1053 
1054   PetscFunctionBegin;
1055   /* diagonal part */
1056   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1057   PetscCall(VecSet(a->slvec1b,0.0));
1058 
1059   /* subdiagonal part */
1060   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1061   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1062 
1063   /* copy x into the vec slvec0 */
1064   PetscCall(VecGetArray(a->slvec0,&from));
1065   PetscCall(VecGetArrayRead(xx,&x));
1066 
1067   PetscCall(PetscArraycpy(from,x,bs*mbs));
1068   PetscCall(VecRestoreArray(a->slvec0,&from));
1069   PetscCall(VecRestoreArrayRead(xx,&x));
1070 
1071   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1072   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1073   /* supperdiagonal part */
1074   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1079 {
1080   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1081   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1082   PetscScalar       *from;
1083   const PetscScalar *x;
1084 
1085   PetscFunctionBegin;
1086   /* diagonal part */
1087   PetscCall((*a->A->ops->mult)(a->A,xx,a->slvec1a));
1088   PetscCall(VecSet(a->slvec1b,0.0));
1089 
1090   /* subdiagonal part */
1091   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1092 
1093   /* copy x into the vec slvec0 */
1094   PetscCall(VecGetArray(a->slvec0,&from));
1095   PetscCall(VecGetArrayRead(xx,&x));
1096 
1097   PetscCall(PetscArraycpy(from,x,bs*mbs));
1098   PetscCall(VecRestoreArray(a->slvec0,&from));
1099   PetscCall(VecRestoreArrayRead(xx,&x));
1100 
1101   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1102   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1103   /* supperdiagonal part */
1104   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy));
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1109 {
1110   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1111   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1112   PetscScalar       *from,zero=0.0;
1113   const PetscScalar *x;
1114 
1115   PetscFunctionBegin;
1116   /* diagonal part */
1117   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1118   PetscCall(VecSet(a->slvec1b,zero));
1119 
1120   /* subdiagonal part */
1121   PetscCheck(a->B->ops->multhermitiantranspose,PetscObjectComm((PetscObject)a->B),PETSC_ERR_SUP,"Not for type %s",((PetscObject)a->B)->type_name);
1122   PetscCall((*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b));
1123 
1124   /* copy x into the vec slvec0 */
1125   PetscCall(VecGetArray(a->slvec0,&from));
1126   PetscCall(VecGetArrayRead(xx,&x));
1127   PetscCall(PetscArraycpy(from,x,bs*mbs));
1128   PetscCall(VecRestoreArray(a->slvec0,&from));
1129 
1130   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1131   PetscCall(VecRestoreArrayRead(xx,&x));
1132   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1133 
1134   /* supperdiagonal part */
1135   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1140 {
1141   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1142   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1143   PetscScalar       *from,zero=0.0;
1144   const PetscScalar *x;
1145 
1146   PetscFunctionBegin;
1147   /* diagonal part */
1148   PetscCall((*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a));
1149   PetscCall(VecSet(a->slvec1b,zero));
1150 
1151   /* subdiagonal part */
1152   PetscCall((*a->B->ops->multtranspose)(a->B,xx,a->slvec0b));
1153 
1154   /* copy x into the vec slvec0 */
1155   PetscCall(VecGetArray(a->slvec0,&from));
1156   PetscCall(VecGetArrayRead(xx,&x));
1157   PetscCall(PetscArraycpy(from,x,bs*mbs));
1158   PetscCall(VecRestoreArray(a->slvec0,&from));
1159 
1160   PetscCall(VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1161   PetscCall(VecRestoreArrayRead(xx,&x));
1162   PetscCall(VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD));
1163 
1164   /* supperdiagonal part */
1165   PetscCall((*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz));
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 /*
1170   This only works correctly for square matrices where the subblock A->A is the
1171    diagonal block
1172 */
1173 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1174 {
1175   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1176 
1177   PetscFunctionBegin;
1178   /* PetscCheckFalse(a->rmap->N != a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1179   PetscCall(MatGetDiagonal(a->A,v));
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1184 {
1185   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1186 
1187   PetscFunctionBegin;
1188   PetscCall(MatScale(a->A,aa));
1189   PetscCall(MatScale(a->B,aa));
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1194 {
1195   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1196   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1197   PetscInt     bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1198   PetscInt     nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1199   PetscInt    *cmap,*idx_p,cstart = mat->rstartbs;
1200 
1201   PetscFunctionBegin;
1202   PetscCheck(!mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1203   mat->getrowactive = PETSC_TRUE;
1204 
1205   if (!mat->rowvalues && (idx || v)) {
1206     /*
1207         allocate enough space to hold information from the longest row.
1208     */
1209     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1210     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1211     PetscInt     max = 1,mbs = mat->mbs,tmp;
1212     for (i=0; i<mbs; i++) {
1213       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1214       if (max < tmp) max = tmp;
1215     }
1216     PetscCall(PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices));
1217   }
1218 
1219   PetscCheck(row >= brstart && row < brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1220   lrow = row - brstart;  /* local row index */
1221 
1222   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1223   if (!v)   {pvA = NULL; pvB = NULL;}
1224   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1225   PetscCall((*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA));
1226   PetscCall((*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB));
1227   nztot = nzA + nzB;
1228 
1229   cmap = mat->garray;
1230   if (v  || idx) {
1231     if (nztot) {
1232       /* Sort by increasing column numbers, assuming A and B already sorted */
1233       PetscInt imark = -1;
1234       if (v) {
1235         *v = v_p = mat->rowvalues;
1236         for (i=0; i<nzB; i++) {
1237           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1238           else break;
1239         }
1240         imark = i;
1241         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1242         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1243       }
1244       if (idx) {
1245         *idx = idx_p = mat->rowindices;
1246         if (imark > -1) {
1247           for (i=0; i<imark; i++) {
1248             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1249           }
1250         } else {
1251           for (i=0; i<nzB; i++) {
1252             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1253             else break;
1254           }
1255           imark = i;
1256         }
1257         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1258         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1259       }
1260     } else {
1261       if (idx) *idx = NULL;
1262       if (v)   *v   = NULL;
1263     }
1264   }
1265   *nz  = nztot;
1266   PetscCall((*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA));
1267   PetscCall((*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB));
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1272 {
1273   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1274 
1275   PetscFunctionBegin;
1276   PetscCheck(baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1277   baij->getrowactive = PETSC_FALSE;
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1282 {
1283   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1284   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1285 
1286   PetscFunctionBegin;
1287   aA->getrow_utriangular = PETSC_TRUE;
1288   PetscFunctionReturn(0);
1289 }
1290 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1291 {
1292   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1293   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1294 
1295   PetscFunctionBegin;
1296   aA->getrow_utriangular = PETSC_FALSE;
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1301 {
1302   PetscFunctionBegin;
1303   if (PetscDefined(USE_COMPLEX)) {
1304     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)mat->data;
1305 
1306     PetscCall(MatConjugate(a->A));
1307     PetscCall(MatConjugate(a->B));
1308   }
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315 
1316   PetscFunctionBegin;
1317   PetscCall(MatRealPart(a->A));
1318   PetscCall(MatRealPart(a->B));
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1325 
1326   PetscFunctionBegin;
1327   PetscCall(MatImaginaryPart(a->A));
1328   PetscCall(MatImaginaryPart(a->B));
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1333    Input: isrow       - distributed(parallel),
1334           iscol_local - locally owned (seq)
1335 */
1336 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1337 {
1338   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1339   const PetscInt *ptr1,*ptr2;
1340 
1341   PetscFunctionBegin;
1342   PetscCall(ISGetLocalSize(isrow,&sz1));
1343   PetscCall(ISGetLocalSize(iscol_local,&sz2));
1344   if (sz1 > sz2) {
1345     *flg = PETSC_FALSE;
1346     PetscFunctionReturn(0);
1347   }
1348 
1349   PetscCall(ISGetIndices(isrow,&ptr1));
1350   PetscCall(ISGetIndices(iscol_local,&ptr2));
1351 
1352   PetscCall(PetscMalloc1(sz1,&a1));
1353   PetscCall(PetscMalloc1(sz2,&a2));
1354   PetscCall(PetscArraycpy(a1,ptr1,sz1));
1355   PetscCall(PetscArraycpy(a2,ptr2,sz2));
1356   PetscCall(PetscSortInt(sz1,a1));
1357   PetscCall(PetscSortInt(sz2,a2));
1358 
1359   nmatch=0;
1360   k     = 0;
1361   for (i=0; i<sz1; i++) {
1362     for (j=k; j<sz2; j++) {
1363       if (a1[i] == a2[j]) {
1364         k = j; nmatch++;
1365         break;
1366       }
1367     }
1368   }
1369   PetscCall(ISRestoreIndices(isrow,&ptr1));
1370   PetscCall(ISRestoreIndices(iscol_local,&ptr2));
1371   PetscCall(PetscFree(a1));
1372   PetscCall(PetscFree(a2));
1373   if (nmatch < sz1) {
1374     *flg = PETSC_FALSE;
1375   } else {
1376     *flg = PETSC_TRUE;
1377   }
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1382 {
1383   IS             iscol_local;
1384   PetscInt       csize;
1385   PetscBool      isequal;
1386 
1387   PetscFunctionBegin;
1388   PetscCall(ISGetLocalSize(iscol,&csize));
1389   if (call == MAT_REUSE_MATRIX) {
1390     PetscCall(PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local));
1391     PetscCheck(iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1392   } else {
1393     PetscBool issorted;
1394 
1395     PetscCall(ISAllGather(iscol,&iscol_local));
1396     PetscCall(ISEqual_private(isrow,iscol_local,&isequal));
1397     PetscCall(ISSorted(iscol_local, &issorted));
1398     PetscCheck(isequal && issorted,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow and be sorted");
1399   }
1400 
1401   /* now call MatCreateSubMatrix_MPIBAIJ() */
1402   PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat));
1403   if (call == MAT_INITIAL_MATRIX) {
1404     PetscCall(PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local));
1405     PetscCall(ISDestroy(&iscol_local));
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1411 {
1412   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1413 
1414   PetscFunctionBegin;
1415   PetscCall(MatZeroEntries(l->A));
1416   PetscCall(MatZeroEntries(l->B));
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1421 {
1422   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1423   Mat            A  = a->A,B = a->B;
1424   PetscLogDouble isend[5],irecv[5];
1425 
1426   PetscFunctionBegin;
1427   info->block_size = (PetscReal)matin->rmap->bs;
1428 
1429   PetscCall(MatGetInfo(A,MAT_LOCAL,info));
1430 
1431   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1432   isend[3] = info->memory;  isend[4] = info->mallocs;
1433 
1434   PetscCall(MatGetInfo(B,MAT_LOCAL,info));
1435 
1436   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1437   isend[3] += info->memory;  isend[4] += info->mallocs;
1438   if (flag == MAT_LOCAL) {
1439     info->nz_used      = isend[0];
1440     info->nz_allocated = isend[1];
1441     info->nz_unneeded  = isend[2];
1442     info->memory       = isend[3];
1443     info->mallocs      = isend[4];
1444   } else if (flag == MAT_GLOBAL_MAX) {
1445     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin)));
1446 
1447     info->nz_used      = irecv[0];
1448     info->nz_allocated = irecv[1];
1449     info->nz_unneeded  = irecv[2];
1450     info->memory       = irecv[3];
1451     info->mallocs      = irecv[4];
1452   } else if (flag == MAT_GLOBAL_SUM) {
1453     PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin)));
1454 
1455     info->nz_used      = irecv[0];
1456     info->nz_allocated = irecv[1];
1457     info->nz_unneeded  = irecv[2];
1458     info->memory       = irecv[3];
1459     info->mallocs      = irecv[4];
1460   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1461   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1462   info->fill_ratio_needed = 0;
1463   info->factor_mallocs    = 0;
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1468 {
1469   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1470   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1471 
1472   PetscFunctionBegin;
1473   switch (op) {
1474   case MAT_NEW_NONZERO_LOCATIONS:
1475   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1476   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1477   case MAT_KEEP_NONZERO_PATTERN:
1478   case MAT_SUBMAT_SINGLEIS:
1479   case MAT_NEW_NONZERO_LOCATION_ERR:
1480     MatCheckPreallocated(A,1);
1481     PetscCall(MatSetOption(a->A,op,flg));
1482     PetscCall(MatSetOption(a->B,op,flg));
1483     break;
1484   case MAT_ROW_ORIENTED:
1485     MatCheckPreallocated(A,1);
1486     a->roworiented = flg;
1487 
1488     PetscCall(MatSetOption(a->A,op,flg));
1489     PetscCall(MatSetOption(a->B,op,flg));
1490     break;
1491   case MAT_FORCE_DIAGONAL_ENTRIES:
1492   case MAT_SORTED_FULL:
1493     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1494     break;
1495   case MAT_IGNORE_OFF_PROC_ENTRIES:
1496     a->donotstash = flg;
1497     break;
1498   case MAT_USE_HASH_TABLE:
1499     a->ht_flag = flg;
1500     break;
1501   case MAT_HERMITIAN:
1502     MatCheckPreallocated(A,1);
1503     PetscCall(MatSetOption(a->A,op,flg));
1504 #if defined(PETSC_USE_COMPLEX)
1505     if (flg) { /* need different mat-vec ops */
1506       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1507       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1508       A->ops->multtranspose    = NULL;
1509       A->ops->multtransposeadd = NULL;
1510       A->symmetric = PETSC_FALSE;
1511     }
1512 #endif
1513     break;
1514   case MAT_SPD:
1515   case MAT_SYMMETRIC:
1516     MatCheckPreallocated(A,1);
1517     PetscCall(MatSetOption(a->A,op,flg));
1518 #if defined(PETSC_USE_COMPLEX)
1519     if (flg) { /* restore to use default mat-vec ops */
1520       A->ops->mult             = MatMult_MPISBAIJ;
1521       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1522       A->ops->multtranspose    = MatMult_MPISBAIJ;
1523       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1524     }
1525 #endif
1526     break;
1527   case MAT_STRUCTURALLY_SYMMETRIC:
1528     MatCheckPreallocated(A,1);
1529     PetscCall(MatSetOption(a->A,op,flg));
1530     break;
1531   case MAT_SYMMETRY_ETERNAL:
1532     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1533     PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op]));
1534     break;
1535   case MAT_IGNORE_LOWER_TRIANGULAR:
1536     aA->ignore_ltriangular = flg;
1537     break;
1538   case MAT_ERROR_LOWER_TRIANGULAR:
1539     aA->ignore_ltriangular = flg;
1540     break;
1541   case MAT_GETROW_UPPERTRIANGULAR:
1542     aA->getrow_utriangular = flg;
1543     break;
1544   default:
1545     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1546   }
1547   PetscFunctionReturn(0);
1548 }
1549 
1550 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1551 {
1552   PetscFunctionBegin;
1553   if (reuse == MAT_INITIAL_MATRIX) {
1554     PetscCall(MatDuplicate(A,MAT_COPY_VALUES,B));
1555   }  else if (reuse == MAT_REUSE_MATRIX) {
1556     PetscCall(MatCopy(A,*B,SAME_NONZERO_PATTERN));
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1562 {
1563   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1564   Mat            a     = baij->A, b=baij->B;
1565   PetscInt       nv,m,n;
1566   PetscBool      flg;
1567 
1568   PetscFunctionBegin;
1569   if (ll != rr) {
1570     PetscCall(VecEqual(ll,rr,&flg));
1571     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same");
1572   }
1573   if (!ll) PetscFunctionReturn(0);
1574 
1575   PetscCall(MatGetLocalSize(mat,&m,&n));
1576   PetscCheck(m == n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same",m,n);
1577 
1578   PetscCall(VecGetLocalSize(rr,&nv));
1579   PetscCheck(nv==n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1580 
1581   PetscCall(VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1582 
1583   /* left diagonalscale the off-diagonal part */
1584   PetscCall((*b->ops->diagonalscale)(b,ll,NULL));
1585 
1586   /* scale the diagonal part */
1587   PetscCall((*a->ops->diagonalscale)(a,ll,rr));
1588 
1589   /* right diagonalscale the off-diagonal part */
1590   PetscCall(VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD));
1591   PetscCall((*b->ops->diagonalscale)(b,NULL,baij->lvec));
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1596 {
1597   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1598 
1599   PetscFunctionBegin;
1600   PetscCall(MatSetUnfactored(a->A));
1601   PetscFunctionReturn(0);
1602 }
1603 
1604 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1605 
1606 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1607 {
1608   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1609   Mat            a,b,c,d;
1610   PetscBool      flg;
1611 
1612   PetscFunctionBegin;
1613   a = matA->A; b = matA->B;
1614   c = matB->A; d = matB->B;
1615 
1616   PetscCall(MatEqual(a,c,&flg));
1617   if (flg) {
1618     PetscCall(MatEqual(b,d,&flg));
1619   }
1620   PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A)));
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1625 {
1626   PetscBool      isbaij;
1627 
1628   PetscFunctionBegin;
1629   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,""));
1630   PetscCheck(isbaij,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1631   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1632   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1633     PetscCall(MatGetRowUpperTriangular(A));
1634     PetscCall(MatCopy_Basic(A,B,str));
1635     PetscCall(MatRestoreRowUpperTriangular(A));
1636   } else {
1637     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1638     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1639 
1640     PetscCall(MatCopy(a->A,b->A,str));
1641     PetscCall(MatCopy(a->B,b->B,str));
1642   }
1643   PetscCall(PetscObjectStateIncrease((PetscObject)B));
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1648 {
1649   PetscFunctionBegin;
1650   PetscCall(MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL));
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1655 {
1656   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1657   PetscBLASInt   bnz,one=1;
1658   Mat_SeqSBAIJ   *xa,*ya;
1659   Mat_SeqBAIJ    *xb,*yb;
1660 
1661   PetscFunctionBegin;
1662   if (str == SAME_NONZERO_PATTERN) {
1663     PetscScalar alpha = a;
1664     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1665     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1666     PetscCall(PetscBLASIntCast(xa->nz,&bnz));
1667     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1668     xb   = (Mat_SeqBAIJ*)xx->B->data;
1669     yb   = (Mat_SeqBAIJ*)yy->B->data;
1670     PetscCall(PetscBLASIntCast(xb->nz,&bnz));
1671     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1672     PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1673   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1674     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
1675     PetscCall(MatAXPY_Basic(Y,a,X,str));
1676     PetscCall(MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
1677   } else {
1678     Mat      B;
1679     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1680     PetscCheck(bs == X->rmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1681     PetscCall(MatGetRowUpperTriangular(X));
1682     PetscCall(MatGetRowUpperTriangular(Y));
1683     PetscCall(PetscMalloc1(yy->A->rmap->N,&nnz_d));
1684     PetscCall(PetscMalloc1(yy->B->rmap->N,&nnz_o));
1685     PetscCall(MatCreate(PetscObjectComm((PetscObject)Y),&B));
1686     PetscCall(PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name));
1687     PetscCall(MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N));
1688     PetscCall(MatSetBlockSizesFromMats(B,Y,Y));
1689     PetscCall(MatSetType(B,MATMPISBAIJ));
1690     PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d));
1691     PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o));
1692     PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o));
1693     PetscCall(MatAXPY_BasicWithPreallocation(B,Y,a,X,str));
1694     PetscCall(MatHeaderMerge(Y,&B));
1695     PetscCall(PetscFree(nnz_d));
1696     PetscCall(PetscFree(nnz_o));
1697     PetscCall(MatRestoreRowUpperTriangular(X));
1698     PetscCall(MatRestoreRowUpperTriangular(Y));
1699   }
1700   PetscFunctionReturn(0);
1701 }
1702 
1703 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1704 {
1705   PetscInt       i;
1706   PetscBool      flg;
1707 
1708   PetscFunctionBegin;
1709   PetscCall(MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B)); /* B[] are sbaij matrices */
1710   for (i=0; i<n; i++) {
1711     PetscCall(ISEqual(irow[i],icol[i],&flg));
1712     if (!flg) {
1713       PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1714     }
1715   }
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1720 {
1721   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1722   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1723 
1724   PetscFunctionBegin;
1725   if (!Y->preallocated) {
1726     PetscCall(MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL));
1727   } else if (!aij->nz) {
1728     PetscInt nonew = aij->nonew;
1729     PetscCall(MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL));
1730     aij->nonew = nonew;
1731   }
1732   PetscCall(MatShift_Basic(Y,a));
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1737 {
1738   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1739 
1740   PetscFunctionBegin;
1741   PetscCheck(A->rmap->n == A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1742   PetscCall(MatMissingDiagonal(a->A,missing,d));
1743   if (d) {
1744     PetscInt rstart;
1745     PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1746     *d += rstart/A->rmap->bs;
1747 
1748   }
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1753 {
1754   PetscFunctionBegin;
1755   *a = ((Mat_MPISBAIJ*)A->data)->A;
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 /* -------------------------------------------------------------------*/
1760 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1761                                        MatGetRow_MPISBAIJ,
1762                                        MatRestoreRow_MPISBAIJ,
1763                                        MatMult_MPISBAIJ,
1764                                /*  4*/ MatMultAdd_MPISBAIJ,
1765                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1766                                        MatMultAdd_MPISBAIJ,
1767                                        NULL,
1768                                        NULL,
1769                                        NULL,
1770                                /* 10*/ NULL,
1771                                        NULL,
1772                                        NULL,
1773                                        MatSOR_MPISBAIJ,
1774                                        MatTranspose_MPISBAIJ,
1775                                /* 15*/ MatGetInfo_MPISBAIJ,
1776                                        MatEqual_MPISBAIJ,
1777                                        MatGetDiagonal_MPISBAIJ,
1778                                        MatDiagonalScale_MPISBAIJ,
1779                                        MatNorm_MPISBAIJ,
1780                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1781                                        MatAssemblyEnd_MPISBAIJ,
1782                                        MatSetOption_MPISBAIJ,
1783                                        MatZeroEntries_MPISBAIJ,
1784                                /* 24*/ NULL,
1785                                        NULL,
1786                                        NULL,
1787                                        NULL,
1788                                        NULL,
1789                                /* 29*/ MatSetUp_MPISBAIJ,
1790                                        NULL,
1791                                        NULL,
1792                                        MatGetDiagonalBlock_MPISBAIJ,
1793                                        NULL,
1794                                /* 34*/ MatDuplicate_MPISBAIJ,
1795                                        NULL,
1796                                        NULL,
1797                                        NULL,
1798                                        NULL,
1799                                /* 39*/ MatAXPY_MPISBAIJ,
1800                                        MatCreateSubMatrices_MPISBAIJ,
1801                                        MatIncreaseOverlap_MPISBAIJ,
1802                                        MatGetValues_MPISBAIJ,
1803                                        MatCopy_MPISBAIJ,
1804                                /* 44*/ NULL,
1805                                        MatScale_MPISBAIJ,
1806                                        MatShift_MPISBAIJ,
1807                                        NULL,
1808                                        NULL,
1809                                /* 49*/ NULL,
1810                                        NULL,
1811                                        NULL,
1812                                        NULL,
1813                                        NULL,
1814                                /* 54*/ NULL,
1815                                        NULL,
1816                                        MatSetUnfactored_MPISBAIJ,
1817                                        NULL,
1818                                        MatSetValuesBlocked_MPISBAIJ,
1819                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1820                                        NULL,
1821                                        NULL,
1822                                        NULL,
1823                                        NULL,
1824                                /* 64*/ NULL,
1825                                        NULL,
1826                                        NULL,
1827                                        NULL,
1828                                        NULL,
1829                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1830                                        NULL,
1831                                        MatConvert_MPISBAIJ_Basic,
1832                                        NULL,
1833                                        NULL,
1834                                /* 74*/ NULL,
1835                                        NULL,
1836                                        NULL,
1837                                        NULL,
1838                                        NULL,
1839                                /* 79*/ NULL,
1840                                        NULL,
1841                                        NULL,
1842                                        NULL,
1843                                        MatLoad_MPISBAIJ,
1844                                /* 84*/ NULL,
1845                                        NULL,
1846                                        NULL,
1847                                        NULL,
1848                                        NULL,
1849                                /* 89*/ NULL,
1850                                        NULL,
1851                                        NULL,
1852                                        NULL,
1853                                        NULL,
1854                                /* 94*/ NULL,
1855                                        NULL,
1856                                        NULL,
1857                                        NULL,
1858                                        NULL,
1859                                /* 99*/ NULL,
1860                                        NULL,
1861                                        NULL,
1862                                        MatConjugate_MPISBAIJ,
1863                                        NULL,
1864                                /*104*/ NULL,
1865                                        MatRealPart_MPISBAIJ,
1866                                        MatImaginaryPart_MPISBAIJ,
1867                                        MatGetRowUpperTriangular_MPISBAIJ,
1868                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1869                                /*109*/ NULL,
1870                                        NULL,
1871                                        NULL,
1872                                        NULL,
1873                                        MatMissingDiagonal_MPISBAIJ,
1874                                /*114*/ NULL,
1875                                        NULL,
1876                                        NULL,
1877                                        NULL,
1878                                        NULL,
1879                                /*119*/ NULL,
1880                                        NULL,
1881                                        NULL,
1882                                        NULL,
1883                                        NULL,
1884                                /*124*/ NULL,
1885                                        NULL,
1886                                        NULL,
1887                                        NULL,
1888                                        NULL,
1889                                /*129*/ NULL,
1890                                        NULL,
1891                                        NULL,
1892                                        NULL,
1893                                        NULL,
1894                                /*134*/ NULL,
1895                                        NULL,
1896                                        NULL,
1897                                        NULL,
1898                                        NULL,
1899                                /*139*/ MatSetBlockSizes_Default,
1900                                        NULL,
1901                                        NULL,
1902                                        NULL,
1903                                        NULL,
1904                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1905                                        NULL,
1906                                        NULL,
1907                                        NULL
1908 };
1909 
1910 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1911 {
1912   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1913   PetscInt       i,mbs,Mbs;
1914   PetscMPIInt    size;
1915 
1916   PetscFunctionBegin;
1917   PetscCall(MatSetBlockSize(B,PetscAbs(bs)));
1918   PetscCall(PetscLayoutSetUp(B->rmap));
1919   PetscCall(PetscLayoutSetUp(B->cmap));
1920   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
1921   PetscCheck(B->rmap->N <= B->cmap->N,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->N,B->cmap->N);
1922   PetscCheck(B->rmap->n <= B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT,B->rmap->n,B->cmap->n);
1923 
1924   mbs = B->rmap->n/bs;
1925   Mbs = B->rmap->N/bs;
1926   PetscCheck(mbs*bs == B->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT,B->rmap->N,bs);
1927 
1928   B->rmap->bs = bs;
1929   b->bs2      = bs*bs;
1930   b->mbs      = mbs;
1931   b->Mbs      = Mbs;
1932   b->nbs      = B->cmap->n/bs;
1933   b->Nbs      = B->cmap->N/bs;
1934 
1935   for (i=0; i<=b->size; i++) {
1936     b->rangebs[i] = B->rmap->range[i]/bs;
1937   }
1938   b->rstartbs = B->rmap->rstart/bs;
1939   b->rendbs   = B->rmap->rend/bs;
1940 
1941   b->cstartbs = B->cmap->rstart/bs;
1942   b->cendbs   = B->cmap->rend/bs;
1943 
1944 #if defined(PETSC_USE_CTABLE)
1945   PetscCall(PetscTableDestroy(&b->colmap));
1946 #else
1947   PetscCall(PetscFree(b->colmap));
1948 #endif
1949   PetscCall(PetscFree(b->garray));
1950   PetscCall(VecDestroy(&b->lvec));
1951   PetscCall(VecScatterDestroy(&b->Mvctx));
1952   PetscCall(VecDestroy(&b->slvec0));
1953   PetscCall(VecDestroy(&b->slvec0b));
1954   PetscCall(VecDestroy(&b->slvec1));
1955   PetscCall(VecDestroy(&b->slvec1a));
1956   PetscCall(VecDestroy(&b->slvec1b));
1957   PetscCall(VecScatterDestroy(&b->sMvctx));
1958 
1959   /* Because the B will have been resized we simply destroy it and create a new one each time */
1960   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&size));
1961   PetscCall(MatDestroy(&b->B));
1962   PetscCall(MatCreate(PETSC_COMM_SELF,&b->B));
1963   PetscCall(MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0));
1964   PetscCall(MatSetType(b->B,MATSEQBAIJ));
1965   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->B));
1966 
1967   if (!B->preallocated) {
1968     PetscCall(MatCreate(PETSC_COMM_SELF,&b->A));
1969     PetscCall(MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n));
1970     PetscCall(MatSetType(b->A,MATSEQSBAIJ));
1971     PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)b->A));
1972     PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash));
1973   }
1974 
1975   PetscCall(MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz));
1976   PetscCall(MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz));
1977 
1978   B->preallocated  = PETSC_TRUE;
1979   B->was_assembled = PETSC_FALSE;
1980   B->assembled     = PETSC_FALSE;
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1985 {
1986   PetscInt       m,rstart,cend;
1987   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=NULL,*o_nnz=NULL;
1988   const PetscInt *JJ    =NULL;
1989   PetscScalar    *values=NULL;
1990   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
1991   PetscBool      nooffprocentries;
1992 
1993   PetscFunctionBegin;
1994   PetscCheck(bs >= 1,PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %" PetscInt_FMT,bs);
1995   PetscCall(PetscLayoutSetBlockSize(B->rmap,bs));
1996   PetscCall(PetscLayoutSetBlockSize(B->cmap,bs));
1997   PetscCall(PetscLayoutSetUp(B->rmap));
1998   PetscCall(PetscLayoutSetUp(B->cmap));
1999   PetscCall(PetscLayoutGetBlockSize(B->rmap,&bs));
2000   m      = B->rmap->n/bs;
2001   rstart = B->rmap->rstart/bs;
2002   cend   = B->cmap->rend/bs;
2003 
2004   PetscCheck(!ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]);
2005   PetscCall(PetscMalloc2(m,&d_nnz,m,&o_nnz));
2006   for (i=0; i<m; i++) {
2007     nz = ii[i+1] - ii[i];
2008     PetscCheck(nz >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz);
2009     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2010     JJ     = jj + ii[i];
2011     bd     = 0;
2012     for (j=0; j<nz; j++) {
2013       if (*JJ >= i + rstart) break;
2014       JJ++;
2015       bd++;
2016     }
2017     d  = 0;
2018     for (; j<nz; j++) {
2019       if (*JJ++ >= cend) break;
2020       d++;
2021     }
2022     d_nnz[i] = d;
2023     o_nnz[i] = nz - d - bd;
2024     nz       = nz - bd;
2025     nz_max = PetscMax(nz_max,nz);
2026   }
2027   PetscCall(MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz));
2028   PetscCall(MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE));
2029   PetscCall(PetscFree2(d_nnz,o_nnz));
2030 
2031   values = (PetscScalar*)V;
2032   if (!values) {
2033     PetscCall(PetscCalloc1(bs*bs*nz_max,&values));
2034   }
2035   for (i=0; i<m; i++) {
2036     PetscInt          row    = i + rstart;
2037     PetscInt          ncols  = ii[i+1] - ii[i];
2038     const PetscInt    *icols = jj + ii[i];
2039     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2040       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2041       PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES));
2042     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2043       PetscInt j;
2044       for (j=0; j<ncols; j++) {
2045         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2046         PetscCall(MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES));
2047       }
2048     }
2049   }
2050 
2051   if (!V) PetscCall(PetscFree(values));
2052   nooffprocentries    = B->nooffprocentries;
2053   B->nooffprocentries = PETSC_TRUE;
2054   PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY));
2055   PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY));
2056   B->nooffprocentries = nooffprocentries;
2057 
2058   PetscCall(MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE));
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 /*MC
2063    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2064    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2065    the matrix is stored.
2066 
2067    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2068    can call MatSetOption(Mat, MAT_HERMITIAN);
2069 
2070    Options Database Keys:
2071 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2072 
2073    Notes:
2074      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2075      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2076 
2077    Level: beginner
2078 
2079 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2080 M*/
2081 
2082 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2083 {
2084   Mat_MPISBAIJ   *b;
2085   PetscErrorCode ierr;
2086   PetscBool      flg = PETSC_FALSE;
2087 
2088   PetscFunctionBegin;
2089   PetscCall(PetscNewLog(B,&b));
2090   B->data = (void*)b;
2091   PetscCall(PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps)));
2092 
2093   B->ops->destroy = MatDestroy_MPISBAIJ;
2094   B->ops->view    = MatView_MPISBAIJ;
2095   B->assembled    = PETSC_FALSE;
2096   B->insertmode   = NOT_SET_VALUES;
2097 
2098   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank));
2099   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size));
2100 
2101   /* build local table of row and column ownerships */
2102   PetscCall(PetscMalloc1(b->size+2,&b->rangebs));
2103 
2104   /* build cache for off array entries formed */
2105   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash));
2106 
2107   b->donotstash  = PETSC_FALSE;
2108   b->colmap      = NULL;
2109   b->garray      = NULL;
2110   b->roworiented = PETSC_TRUE;
2111 
2112   /* stuff used in block assembly */
2113   b->barray = NULL;
2114 
2115   /* stuff used for matrix vector multiply */
2116   b->lvec    = NULL;
2117   b->Mvctx   = NULL;
2118   b->slvec0  = NULL;
2119   b->slvec0b = NULL;
2120   b->slvec1  = NULL;
2121   b->slvec1a = NULL;
2122   b->slvec1b = NULL;
2123   b->sMvctx  = NULL;
2124 
2125   /* stuff for MatGetRow() */
2126   b->rowindices   = NULL;
2127   b->rowvalues    = NULL;
2128   b->getrowactive = PETSC_FALSE;
2129 
2130   /* hash table stuff */
2131   b->ht           = NULL;
2132   b->hd           = NULL;
2133   b->ht_size      = 0;
2134   b->ht_flag      = PETSC_FALSE;
2135   b->ht_fact      = 0;
2136   b->ht_total_ct  = 0;
2137   b->ht_insert_ct = 0;
2138 
2139   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2140   b->ijonly = PETSC_FALSE;
2141 
2142   b->in_loc = NULL;
2143   b->v_loc  = NULL;
2144   b->n_loc  = 0;
2145 
2146   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ));
2147   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ));
2148   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ));
2149   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2150 #if defined(PETSC_HAVE_ELEMENTAL)
2151   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental));
2152 #endif
2153 #if defined(PETSC_HAVE_SCALAPACK)
2154   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK));
2155 #endif
2156   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic));
2157   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic));
2158 
2159   B->symmetric                  = PETSC_TRUE;
2160   B->structurally_symmetric     = PETSC_TRUE;
2161   B->symmetric_set              = PETSC_TRUE;
2162   B->structurally_symmetric_set = PETSC_TRUE;
2163   B->symmetric_eternal          = PETSC_TRUE;
2164 #if defined(PETSC_USE_COMPLEX)
2165   B->hermitian                  = PETSC_FALSE;
2166   B->hermitian_set              = PETSC_FALSE;
2167 #else
2168   B->hermitian                  = PETSC_TRUE;
2169   B->hermitian_set              = PETSC_TRUE;
2170 #endif
2171 
2172   PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ));
2173   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");PetscCall(ierr);
2174   PetscCall(PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL));
2175   if (flg) {
2176     PetscReal fact = 1.39;
2177     PetscCall(MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE));
2178     PetscCall(PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL));
2179     if (fact <= 1.0) fact = 1.39;
2180     PetscCall(MatMPIBAIJSetHashTableFactor(B,fact));
2181     PetscCall(PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact));
2182   }
2183   ierr = PetscOptionsEnd();PetscCall(ierr);
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 /*MC
2188    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2189 
2190    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2191    and MATMPISBAIJ otherwise.
2192 
2193    Options Database Keys:
2194 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2195 
2196   Level: beginner
2197 
2198 .seealso: MatCreateSBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2199 M*/
2200 
2201 /*@C
2202    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2203    the user should preallocate the matrix storage by setting the parameters
2204    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2205    performance can be increased by more than a factor of 50.
2206 
2207    Collective on Mat
2208 
2209    Input Parameters:
2210 +  B - the matrix
2211 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2212           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2213 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2214            submatrix  (same for all local rows)
2215 .  d_nnz - array containing the number of block nonzeros in the various block rows
2216            in the upper triangular and diagonal part of the in diagonal portion of the local
2217            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2218            for the diagonal entry and set a value even if it is zero.
2219 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2220            submatrix (same for all local rows).
2221 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2222            off-diagonal portion of the local submatrix that is right of the diagonal
2223            (possibly different for each block row) or NULL.
2224 
2225    Options Database Keys:
2226 +   -mat_no_unroll - uses code that does not unroll the loops in the
2227                      block calculations (much slower)
2228 -   -mat_block_size - size of the blocks to use
2229 
2230    Notes:
2231 
2232    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2233    than it must be used on all processors that share the object for that argument.
2234 
2235    If the *_nnz parameter is given then the *_nz parameter is ignored
2236 
2237    Storage Information:
2238    For a square global matrix we define each processor's diagonal portion
2239    to be its local rows and the corresponding columns (a square submatrix);
2240    each processor's off-diagonal portion encompasses the remainder of the
2241    local matrix (a rectangular submatrix).
2242 
2243    The user can specify preallocated storage for the diagonal part of
2244    the local submatrix with either d_nz or d_nnz (not both).  Set
2245    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2246    memory allocation.  Likewise, specify preallocated storage for the
2247    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2248 
2249    You can call MatGetInfo() to get information on how effective the preallocation was;
2250    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2251    You can also run with the option -info and look for messages with the string
2252    malloc in them to see if additional memory allocation was needed.
2253 
2254    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2255    the figure below we depict these three local rows and all columns (0-11).
2256 
2257 .vb
2258            0 1 2 3 4 5 6 7 8 9 10 11
2259           --------------------------
2260    row 3  |. . . d d d o o o o  o  o
2261    row 4  |. . . d d d o o o o  o  o
2262    row 5  |. . . d d d o o o o  o  o
2263           --------------------------
2264 .ve
2265 
2266    Thus, any entries in the d locations are stored in the d (diagonal)
2267    submatrix, and any entries in the o locations are stored in the
2268    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2269    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2270 
2271    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2272    plus the diagonal part of the d matrix,
2273    and o_nz should indicate the number of block nonzeros per row in the o matrix
2274 
2275    In general, for PDE problems in which most nonzeros are near the diagonal,
2276    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2277    or you will get TERRIBLE performance; see the users' manual chapter on
2278    matrices.
2279 
2280    Level: intermediate
2281 
2282 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2283 @*/
2284 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2285 {
2286   PetscFunctionBegin;
2287   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2288   PetscValidType(B,1);
2289   PetscValidLogicalCollectiveInt(B,bs,2);
2290   PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 /*@C
2295    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2296    (block compressed row).  For good matrix assembly performance
2297    the user should preallocate the matrix storage by setting the parameters
2298    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2299    performance can be increased by more than a factor of 50.
2300 
2301    Collective
2302 
2303    Input Parameters:
2304 +  comm - MPI communicator
2305 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2306           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2307 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2308            This value should be the same as the local size used in creating the
2309            y vector for the matrix-vector product y = Ax.
2310 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2311            This value should be the same as the local size used in creating the
2312            x vector for the matrix-vector product y = Ax.
2313 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2314 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2315 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2316            submatrix  (same for all local rows)
2317 .  d_nnz - array containing the number of block nonzeros in the various block rows
2318            in the upper triangular portion of the in diagonal portion of the local
2319            (possibly different for each block block row) or NULL.
2320            If you plan to factor the matrix you must leave room for the diagonal entry and
2321            set its value even if it is zero.
2322 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2323            submatrix (same for all local rows).
2324 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2325            off-diagonal portion of the local submatrix (possibly different for
2326            each block row) or NULL.
2327 
2328    Output Parameter:
2329 .  A - the matrix
2330 
2331    Options Database Keys:
2332 +   -mat_no_unroll - uses code that does not unroll the loops in the
2333                      block calculations (much slower)
2334 .   -mat_block_size - size of the blocks to use
2335 -   -mat_mpi - use the parallel matrix data structures even on one processor
2336                (defaults to using SeqBAIJ format on one processor)
2337 
2338    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2339    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2340    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2341 
2342    Notes:
2343    The number of rows and columns must be divisible by blocksize.
2344    This matrix type does not support complex Hermitian operation.
2345 
2346    The user MUST specify either the local or global matrix dimensions
2347    (possibly both).
2348 
2349    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2350    than it must be used on all processors that share the object for that argument.
2351 
2352    If the *_nnz parameter is given then the *_nz parameter is ignored
2353 
2354    Storage Information:
2355    For a square global matrix we define each processor's diagonal portion
2356    to be its local rows and the corresponding columns (a square submatrix);
2357    each processor's off-diagonal portion encompasses the remainder of the
2358    local matrix (a rectangular submatrix).
2359 
2360    The user can specify preallocated storage for the diagonal part of
2361    the local submatrix with either d_nz or d_nnz (not both).  Set
2362    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2363    memory allocation.  Likewise, specify preallocated storage for the
2364    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2365 
2366    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2367    the figure below we depict these three local rows and all columns (0-11).
2368 
2369 .vb
2370            0 1 2 3 4 5 6 7 8 9 10 11
2371           --------------------------
2372    row 3  |. . . d d d o o o o  o  o
2373    row 4  |. . . d d d o o o o  o  o
2374    row 5  |. . . d d d o o o o  o  o
2375           --------------------------
2376 .ve
2377 
2378    Thus, any entries in the d locations are stored in the d (diagonal)
2379    submatrix, and any entries in the o locations are stored in the
2380    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2381    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2382 
2383    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2384    plus the diagonal part of the d matrix,
2385    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2386    In general, for PDE problems in which most nonzeros are near the diagonal,
2387    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2388    or you will get TERRIBLE performance; see the users' manual chapter on
2389    matrices.
2390 
2391    Level: intermediate
2392 
2393 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2394 @*/
2395 
2396 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2397 {
2398   PetscMPIInt    size;
2399 
2400   PetscFunctionBegin;
2401   PetscCall(MatCreate(comm,A));
2402   PetscCall(MatSetSizes(*A,m,n,M,N));
2403   PetscCallMPI(MPI_Comm_size(comm,&size));
2404   if (size > 1) {
2405     PetscCall(MatSetType(*A,MATMPISBAIJ));
2406     PetscCall(MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz));
2407   } else {
2408     PetscCall(MatSetType(*A,MATSEQSBAIJ));
2409     PetscCall(MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz));
2410   }
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2415 {
2416   Mat            mat;
2417   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2418   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2419   PetscScalar    *array;
2420 
2421   PetscFunctionBegin;
2422   *newmat = NULL;
2423 
2424   PetscCall(MatCreate(PetscObjectComm((PetscObject)matin),&mat));
2425   PetscCall(MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N));
2426   PetscCall(MatSetType(mat,((PetscObject)matin)->type_name));
2427   PetscCall(PetscLayoutReference(matin->rmap,&mat->rmap));
2428   PetscCall(PetscLayoutReference(matin->cmap,&mat->cmap));
2429 
2430   mat->factortype   = matin->factortype;
2431   mat->preallocated = PETSC_TRUE;
2432   mat->assembled    = PETSC_TRUE;
2433   mat->insertmode   = NOT_SET_VALUES;
2434 
2435   a      = (Mat_MPISBAIJ*)mat->data;
2436   a->bs2 = oldmat->bs2;
2437   a->mbs = oldmat->mbs;
2438   a->nbs = oldmat->nbs;
2439   a->Mbs = oldmat->Mbs;
2440   a->Nbs = oldmat->Nbs;
2441 
2442   a->size         = oldmat->size;
2443   a->rank         = oldmat->rank;
2444   a->donotstash   = oldmat->donotstash;
2445   a->roworiented  = oldmat->roworiented;
2446   a->rowindices   = NULL;
2447   a->rowvalues    = NULL;
2448   a->getrowactive = PETSC_FALSE;
2449   a->barray       = NULL;
2450   a->rstartbs     = oldmat->rstartbs;
2451   a->rendbs       = oldmat->rendbs;
2452   a->cstartbs     = oldmat->cstartbs;
2453   a->cendbs       = oldmat->cendbs;
2454 
2455   /* hash table stuff */
2456   a->ht           = NULL;
2457   a->hd           = NULL;
2458   a->ht_size      = 0;
2459   a->ht_flag      = oldmat->ht_flag;
2460   a->ht_fact      = oldmat->ht_fact;
2461   a->ht_total_ct  = 0;
2462   a->ht_insert_ct = 0;
2463 
2464   PetscCall(PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2));
2465   if (oldmat->colmap) {
2466 #if defined(PETSC_USE_CTABLE)
2467     PetscCall(PetscTableCreateCopy(oldmat->colmap,&a->colmap));
2468 #else
2469     PetscCall(PetscMalloc1(a->Nbs,&a->colmap));
2470     PetscCall(PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt)));
2471     PetscCall(PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs));
2472 #endif
2473   } else a->colmap = NULL;
2474 
2475   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2476     PetscCall(PetscMalloc1(len,&a->garray));
2477     PetscCall(PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt)));
2478     PetscCall(PetscArraycpy(a->garray,oldmat->garray,len));
2479   } else a->garray = NULL;
2480 
2481   PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash));
2482   PetscCall(VecDuplicate(oldmat->lvec,&a->lvec));
2483   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec));
2484   PetscCall(VecScatterCopy(oldmat->Mvctx,&a->Mvctx));
2485   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx));
2486 
2487   PetscCall(VecDuplicate(oldmat->slvec0,&a->slvec0));
2488   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2489   PetscCall(VecDuplicate(oldmat->slvec1,&a->slvec1));
2490   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2491 
2492   PetscCall(VecGetLocalSize(a->slvec1,&nt));
2493   PetscCall(VecGetArray(a->slvec1,&array));
2494   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a));
2495   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b));
2496   PetscCall(VecRestoreArray(a->slvec1,&array));
2497   PetscCall(VecGetArray(a->slvec0,&array));
2498   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b));
2499   PetscCall(VecRestoreArray(a->slvec0,&array));
2500   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0));
2501   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1));
2502   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b));
2503   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a));
2504   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b));
2505 
2506   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2507   PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2508   a->sMvctx = oldmat->sMvctx;
2509   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx));
2510 
2511   PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A));
2512   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A));
2513   PetscCall(MatDuplicate(oldmat->B,cpvalues,&a->B));
2514   PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B));
2515   PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist));
2516   *newmat = mat;
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2521 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2522 
2523 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2524 {
2525   PetscBool      isbinary;
2526 
2527   PetscFunctionBegin;
2528   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary));
2529   PetscCheck(isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2530   PetscCall(MatLoad_MPISBAIJ_Binary(mat,viewer));
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 /*XXXXX@
2535    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2536 
2537    Input Parameters:
2538 .  mat  - the matrix
2539 .  fact - factor
2540 
2541    Not Collective on Mat, each process can have a different hash factor
2542 
2543    Level: advanced
2544 
2545   Notes:
2546    This can also be set by the command line option: -mat_use_hash_table fact
2547 
2548 .seealso: MatSetOption()
2549 @XXXXX*/
2550 
2551 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2552 {
2553   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2554   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2555   PetscReal      atmp;
2556   PetscReal      *work,*svalues,*rvalues;
2557   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2558   PetscMPIInt    rank,size;
2559   PetscInt       *rowners_bs,dest,count,source;
2560   PetscScalar    *va;
2561   MatScalar      *ba;
2562   MPI_Status     stat;
2563 
2564   PetscFunctionBegin;
2565   PetscCheck(!idx,PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2566   PetscCall(MatGetRowMaxAbs(a->A,v,NULL));
2567   PetscCall(VecGetArray(v,&va));
2568 
2569   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
2570   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank));
2571 
2572   bs  = A->rmap->bs;
2573   mbs = a->mbs;
2574   Mbs = a->Mbs;
2575   ba  = b->a;
2576   bi  = b->i;
2577   bj  = b->j;
2578 
2579   /* find ownerships */
2580   rowners_bs = A->rmap->range;
2581 
2582   /* each proc creates an array to be distributed */
2583   PetscCall(PetscCalloc1(bs*Mbs,&work));
2584 
2585   /* row_max for B */
2586   if (rank != size-1) {
2587     for (i=0; i<mbs; i++) {
2588       ncols = bi[1] - bi[0]; bi++;
2589       brow  = bs*i;
2590       for (j=0; j<ncols; j++) {
2591         bcol = bs*(*bj);
2592         for (kcol=0; kcol<bs; kcol++) {
2593           col  = bcol + kcol;                /* local col index */
2594           col += rowners_bs[rank+1];      /* global col index */
2595           for (krow=0; krow<bs; krow++) {
2596             atmp = PetscAbsScalar(*ba); ba++;
2597             row  = brow + krow;   /* local row index */
2598             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2599             if (work[col] < atmp) work[col] = atmp;
2600           }
2601         }
2602         bj++;
2603       }
2604     }
2605 
2606     /* send values to its owners */
2607     for (dest=rank+1; dest<size; dest++) {
2608       svalues = work + rowners_bs[dest];
2609       count   = rowners_bs[dest+1]-rowners_bs[dest];
2610       PetscCallMPI(MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A)));
2611     }
2612   }
2613 
2614   /* receive values */
2615   if (rank) {
2616     rvalues = work;
2617     count   = rowners_bs[rank+1]-rowners_bs[rank];
2618     for (source=0; source<rank; source++) {
2619       PetscCallMPI(MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat));
2620       /* process values */
2621       for (i=0; i<count; i++) {
2622         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2623       }
2624     }
2625   }
2626 
2627   PetscCall(VecRestoreArray(v,&va));
2628   PetscCall(PetscFree(work));
2629   PetscFunctionReturn(0);
2630 }
2631 
2632 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2633 {
2634   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2635   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2636   PetscScalar       *x,*ptr,*from;
2637   Vec               bb1;
2638   const PetscScalar *b;
2639 
2640   PetscFunctionBegin;
2641   PetscCheck(its > 0 && lits > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
2642   PetscCheck(bs <= 1,PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2643 
2644   if (flag == SOR_APPLY_UPPER) {
2645     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2646     PetscFunctionReturn(0);
2647   }
2648 
2649   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2650     if (flag & SOR_ZERO_INITIAL_GUESS) {
2651       PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx));
2652       its--;
2653     }
2654 
2655     PetscCall(VecDuplicate(bb,&bb1));
2656     while (its--) {
2657 
2658       /* lower triangular part: slvec0b = - B^T*xx */
2659       PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2660 
2661       /* copy xx into slvec0a */
2662       PetscCall(VecGetArray(mat->slvec0,&ptr));
2663       PetscCall(VecGetArray(xx,&x));
2664       PetscCall(PetscArraycpy(ptr,x,bs*mbs));
2665       PetscCall(VecRestoreArray(mat->slvec0,&ptr));
2666 
2667       PetscCall(VecScale(mat->slvec0,-1.0));
2668 
2669       /* copy bb into slvec1a */
2670       PetscCall(VecGetArray(mat->slvec1,&ptr));
2671       PetscCall(VecGetArrayRead(bb,&b));
2672       PetscCall(PetscArraycpy(ptr,b,bs*mbs));
2673       PetscCall(VecRestoreArray(mat->slvec1,&ptr));
2674 
2675       /* set slvec1b = 0 */
2676       PetscCall(VecSet(mat->slvec1b,0.0));
2677 
2678       PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2679       PetscCall(VecRestoreArray(xx,&x));
2680       PetscCall(VecRestoreArrayRead(bb,&b));
2681       PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2682 
2683       /* upper triangular part: bb1 = bb1 - B*x */
2684       PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1));
2685 
2686       /* local diagonal sweep */
2687       PetscCall((*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx));
2688     }
2689     PetscCall(VecDestroy(&bb1));
2690   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2691     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2692   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2693     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx));
2694   } else if (flag & SOR_EISENSTAT) {
2695     Vec               xx1;
2696     PetscBool         hasop;
2697     const PetscScalar *diag;
2698     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2699     PetscInt          i,n;
2700 
2701     if (!mat->xx1) {
2702       PetscCall(VecDuplicate(bb,&mat->xx1));
2703       PetscCall(VecDuplicate(bb,&mat->bb1));
2704     }
2705     xx1 = mat->xx1;
2706     bb1 = mat->bb1;
2707 
2708     PetscCall((*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx));
2709 
2710     if (!mat->diag) {
2711       /* this is wrong for same matrix with new nonzero values */
2712       PetscCall(MatCreateVecs(matin,&mat->diag,NULL));
2713       PetscCall(MatGetDiagonal(matin,mat->diag));
2714     }
2715     PetscCall(MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop));
2716 
2717     if (hasop) {
2718       PetscCall(MatMultDiagonalBlock(matin,xx,bb1));
2719       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2720     } else {
2721       /*
2722           These two lines are replaced by code that may be a bit faster for a good compiler
2723       PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2724       PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2725       */
2726       PetscCall(VecGetArray(mat->slvec1a,&sl));
2727       PetscCall(VecGetArrayRead(mat->diag,&diag));
2728       PetscCall(VecGetArrayRead(bb,&b));
2729       PetscCall(VecGetArray(xx,&x));
2730       PetscCall(VecGetLocalSize(xx,&n));
2731       if (omega == 1.0) {
2732         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2733         PetscCall(PetscLogFlops(2.0*n));
2734       } else {
2735         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2736         PetscCall(PetscLogFlops(3.0*n));
2737       }
2738       PetscCall(VecRestoreArray(mat->slvec1a,&sl));
2739       PetscCall(VecRestoreArrayRead(mat->diag,&diag));
2740       PetscCall(VecRestoreArrayRead(bb,&b));
2741       PetscCall(VecRestoreArray(xx,&x));
2742     }
2743 
2744     /* multiply off-diagonal portion of matrix */
2745     PetscCall(VecSet(mat->slvec1b,0.0));
2746     PetscCall((*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b));
2747     PetscCall(VecGetArray(mat->slvec0,&from));
2748     PetscCall(VecGetArray(xx,&x));
2749     PetscCall(PetscArraycpy(from,x,bs*mbs));
2750     PetscCall(VecRestoreArray(mat->slvec0,&from));
2751     PetscCall(VecRestoreArray(xx,&x));
2752     PetscCall(VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2753     PetscCall(VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD));
2754     PetscCall((*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a));
2755 
2756     /* local sweep */
2757     PetscCall((*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1));
2758     PetscCall(VecAXPY(xx,1.0,xx1));
2759   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 /*@
2764      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2765          CSR format the local rows.
2766 
2767    Collective
2768 
2769    Input Parameters:
2770 +  comm - MPI communicator
2771 .  bs - the block size, only a block size of 1 is supported
2772 .  m - number of local rows (Cannot be PETSC_DECIDE)
2773 .  n - This value should be the same as the local size used in creating the
2774        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2775        calculated if N is given) For square matrices n is almost always m.
2776 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2777 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2778 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2779 .   j - column indices
2780 -   a - matrix values
2781 
2782    Output Parameter:
2783 .   mat - the matrix
2784 
2785    Level: intermediate
2786 
2787    Notes:
2788        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2789      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2790      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2791 
2792        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2793 
2794 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2795           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2796 @*/
2797 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2798 {
2799   PetscFunctionBegin;
2800   PetscCheck(!i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2801   PetscCheck(m >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2802   PetscCall(MatCreate(comm,mat));
2803   PetscCall(MatSetSizes(*mat,m,n,M,N));
2804   PetscCall(MatSetType(*mat,MATMPISBAIJ));
2805   PetscCall(MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a));
2806   PetscFunctionReturn(0);
2807 }
2808 
2809 /*@C
2810    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2811 
2812    Collective
2813 
2814    Input Parameters:
2815 +  B - the matrix
2816 .  bs - the block size
2817 .  i - the indices into j for the start of each local row (starts with zero)
2818 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2819 -  v - optional values in the matrix
2820 
2821    Level: advanced
2822 
2823    Notes:
2824    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2825    and usually the numerical values as well
2826 
2827    Any entries below the diagonal are ignored
2828 
2829 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2830 @*/
2831 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2832 {
2833   PetscFunctionBegin;
2834   PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2839 {
2840   PetscErrorCode ierr;
2841   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2842   PetscInt       *indx;
2843   PetscScalar    *values;
2844 
2845   PetscFunctionBegin;
2846   PetscCall(MatGetSize(inmat,&m,&N));
2847   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2848     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2849     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2850     PetscInt       *bindx,rmax=a->rmax,j;
2851     PetscMPIInt    rank,size;
2852 
2853     PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2854     mbs = m/bs; Nbs = N/cbs;
2855     if (n == PETSC_DECIDE) {
2856       PetscCall(PetscSplitOwnershipBlock(comm,cbs,&n,&N));
2857     }
2858     nbs = n/cbs;
2859 
2860     PetscCall(PetscMalloc1(rmax,&bindx));
2861     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);PetscCall(ierr); /* inline function, output __end and __rstart are used below */
2862 
2863     PetscCallMPI(MPI_Comm_rank(comm,&rank));
2864     PetscCallMPI(MPI_Comm_rank(comm,&size));
2865     if (rank == size-1) {
2866       /* Check sum(nbs) = Nbs */
2867       PetscCheck(__end == Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs);
2868     }
2869 
2870     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2871     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2872     for (i=0; i<mbs; i++) {
2873       PetscCall(MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL)); /* non-blocked nnz and indx */
2874       nnz  = nnz/bs;
2875       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2876       PetscCall(MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz));
2877       PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL));
2878     }
2879     PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2880     PetscCall(PetscFree(bindx));
2881 
2882     PetscCall(MatCreate(comm,outmat));
2883     PetscCall(MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE));
2884     PetscCall(MatSetBlockSizes(*outmat,bs,cbs));
2885     PetscCall(MatSetType(*outmat,MATSBAIJ));
2886     PetscCall(MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz));
2887     PetscCall(MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz));
2888     ierr = MatPreallocateFinalize(dnz,onz);PetscCall(ierr);
2889   }
2890 
2891   /* numeric phase */
2892   PetscCall(MatGetBlockSizes(inmat,&bs,&cbs));
2893   PetscCall(MatGetOwnershipRange(*outmat,&rstart,NULL));
2894 
2895   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE));
2896   for (i=0; i<m; i++) {
2897     PetscCall(MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2898     Ii   = i + rstart;
2899     PetscCall(MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES));
2900     PetscCall(MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values));
2901   }
2902   PetscCall(MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE));
2903   PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY));
2904   PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY));
2905   PetscFunctionReturn(0);
2906 }
2907