xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d2fd7bfc6f0fd2e1d083decbb7cc7d77e16824f0)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE,isdense;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
78     if (!isdense) {
79       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
80       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
81       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
82     } else {
83       ierr = MatSetUp(B);CHKERRQ(ierr);
84     }
85   } else {
86     B    = *newmat;
87     ierr = MatZeroEntries(B);CHKERRQ(ierr);
88   }
89 
90   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
91   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
92     PetscInt          ncols;
93     const PetscInt    *row;
94     const PetscScalar *vals;
95 
96     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
97     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
98     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
99     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
100   }
101   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
102   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
103   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
104 
105   if (reuse == MAT_INPLACE_MATRIX) {
106     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
107   } else {
108     *newmat = B;
109   }
110   PetscFunctionReturn(0);
111 }
112 
113 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
114 {
115   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
116   PetscErrorCode ierr;
117 
118   PetscFunctionBegin;
119   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
120   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
125 {
126   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
131   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 
135 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
136   { \
137     brow = row/bs;  \
138     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
139     rmax = aimax[brow]; nrow = ailen[brow]; \
140     bcol = col/bs; \
141     ridx = row % bs; cidx = col % bs; \
142     low  = 0; high = nrow; \
143     while (high-low > 3) { \
144       t = (low+high)/2; \
145       if (rp[t] > bcol) high = t; \
146       else              low  = t; \
147     } \
148     for (_i=low; _i<high; _i++) { \
149       if (rp[_i] > bcol) break; \
150       if (rp[_i] == bcol) { \
151         bap = ap + bs2*_i + bs*cidx + ridx; \
152         if (addv == ADD_VALUES) *bap += value;  \
153         else                    *bap  = value;  \
154         goto a_noinsert; \
155       } \
156     } \
157     if (a->nonew == 1) goto a_noinsert; \
158     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
159     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
160     N = nrow++ - 1;  \
161     /* shift up all the later entries in this row */ \
162     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
163     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
164     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
165     rp[_i]                      = bcol;  \
166     ap[bs2*_i + bs*cidx + ridx] = value;  \
167     A->nonzerostate++;\
168 a_noinsert:; \
169     ailen[brow] = nrow; \
170   }
171 
172 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
173   { \
174     brow = row/bs;  \
175     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176     rmax = bimax[brow]; nrow = bilen[brow]; \
177     bcol = col/bs; \
178     ridx = row % bs; cidx = col % bs; \
179     low  = 0; high = nrow; \
180     while (high-low > 3) { \
181       t = (low+high)/2; \
182       if (rp[t] > bcol) high = t; \
183       else              low  = t; \
184     } \
185     for (_i=low; _i<high; _i++) { \
186       if (rp[_i] > bcol) break; \
187       if (rp[_i] == bcol) { \
188         bap = ap + bs2*_i + bs*cidx + ridx; \
189         if (addv == ADD_VALUES) *bap += value;  \
190         else                    *bap  = value;  \
191         goto b_noinsert; \
192       } \
193     } \
194     if (b->nonew == 1) goto b_noinsert; \
195     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
196     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197     N = nrow++ - 1;  \
198     /* shift up all the later entries in this row */ \
199     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
200     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
201     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
202     rp[_i]                      = bcol;  \
203     ap[bs2*_i + bs*cidx + ridx] = value;  \
204     B->nonzerostate++;\
205 b_noinsert:; \
206     bilen[brow] = nrow; \
207   }
208 
209 /* Only add/insert a(i,j) with i<=j (blocks).
210    Any a(i,j) with i>j input by user is ingored or generates an error
211 */
212 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
213 {
214   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
215   MatScalar      value;
216   PetscBool      roworiented = baij->roworiented;
217   PetscErrorCode ierr;
218   PetscInt       i,j,row,col;
219   PetscInt       rstart_orig=mat->rmap->rstart;
220   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
221   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
222 
223   /* Some Variables required in the macro */
224   Mat          A     = baij->A;
225   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
226   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
227   MatScalar    *aa   =a->a;
228 
229   Mat         B     = baij->B;
230   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
231   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
232   MatScalar   *ba   =b->a;
233 
234   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
235   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
236   MatScalar *ap,*bap;
237 
238   /* for stash */
239   PetscInt  n_loc, *in_loc = NULL;
240   MatScalar *v_loc = NULL;
241 
242   PetscFunctionBegin;
243   if (!baij->donotstash) {
244     if (n > baij->n_loc) {
245       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
246       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
247       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
248       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
249 
250       baij->n_loc = n;
251     }
252     in_loc = baij->in_loc;
253     v_loc  = baij->v_loc;
254   }
255 
256   for (i=0; i<m; i++) {
257     if (im[i] < 0) continue;
258 #if defined(PETSC_USE_DEBUG)
259     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
260 #endif
261     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
262       row = im[i] - rstart_orig;              /* local row index */
263       for (j=0; j<n; j++) {
264         if (im[i]/bs > in[j]/bs) {
265           if (a->ignore_ltriangular) {
266             continue;    /* ignore lower triangular blocks */
267           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
268         }
269         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
270           col  = in[j] - cstart_orig;         /* local col index */
271           brow = row/bs; bcol = col/bs;
272           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
273           if (roworiented) value = v[i*n+j];
274           else             value = v[i+j*m];
275           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
276           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
277         } else if (in[j] < 0) continue;
278 #if defined(PETSC_USE_DEBUG)
279         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
280 #endif
281         else {  /* off-diag entry (B) */
282           if (mat->was_assembled) {
283             if (!baij->colmap) {
284               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
285             }
286 #if defined(PETSC_USE_CTABLE)
287             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
288             col  = col - 1;
289 #else
290             col = baij->colmap[in[j]/bs] - 1;
291 #endif
292             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
293               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
294               col  =  in[j];
295               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
296               B    = baij->B;
297               b    = (Mat_SeqBAIJ*)(B)->data;
298               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
299               ba   = b->a;
300             } else col += in[j]%bs;
301           } else col = in[j];
302           if (roworiented) value = v[i*n+j];
303           else             value = v[i+j*m];
304           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
305           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
306         }
307       }
308     } else {  /* off processor entry */
309       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
310       if (!baij->donotstash) {
311         mat->assembled = PETSC_FALSE;
312         n_loc          = 0;
313         for (j=0; j<n; j++) {
314           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
315           in_loc[n_loc] = in[j];
316           if (roworiented) {
317             v_loc[n_loc] = v[i*n+j];
318           } else {
319             v_loc[n_loc] = v[j*m+i];
320           }
321           n_loc++;
322         }
323         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
324       }
325     }
326   }
327   PetscFunctionReturn(0);
328 }
329 
330 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
331 {
332   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
333   PetscErrorCode    ierr;
334   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
335   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
336   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
337   PetscBool         roworiented=a->roworiented;
338   const PetscScalar *value     = v;
339   MatScalar         *ap,*aa = a->a,*bap;
340 
341   PetscFunctionBegin;
342   if (col < row) {
343     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
344     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
345   }
346   rp   = aj + ai[row];
347   ap   = aa + bs2*ai[row];
348   rmax = imax[row];
349   nrow = ailen[row];
350   value = v;
351   low   = 0;
352   high  = nrow;
353 
354   while (high-low > 7) {
355     t = (low+high)/2;
356     if (rp[t] > col) high = t;
357     else             low  = t;
358   }
359   for (i=low; i<high; i++) {
360     if (rp[i] > col) break;
361     if (rp[i] == col) {
362       bap = ap +  bs2*i;
363       if (roworiented) {
364         if (is == ADD_VALUES) {
365           for (ii=0; ii<bs; ii++) {
366             for (jj=ii; jj<bs2; jj+=bs) {
367               bap[jj] += *value++;
368             }
369           }
370         } else {
371           for (ii=0; ii<bs; ii++) {
372             for (jj=ii; jj<bs2; jj+=bs) {
373               bap[jj] = *value++;
374             }
375           }
376         }
377       } else {
378         if (is == ADD_VALUES) {
379           for (ii=0; ii<bs; ii++) {
380             for (jj=0; jj<bs; jj++) {
381               *bap++ += *value++;
382             }
383           }
384         } else {
385           for (ii=0; ii<bs; ii++) {
386             for (jj=0; jj<bs; jj++) {
387               *bap++  = *value++;
388             }
389           }
390         }
391       }
392       goto noinsert2;
393     }
394   }
395   if (nonew == 1) goto noinsert2;
396   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
397   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
398   N = nrow++ - 1; high++;
399   /* shift up all the later entries in this row */
400   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
401   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
402   rp[i] = col;
403   bap   = ap +  bs2*i;
404   if (roworiented) {
405     for (ii=0; ii<bs; ii++) {
406       for (jj=ii; jj<bs2; jj+=bs) {
407         bap[jj] = *value++;
408       }
409     }
410   } else {
411     for (ii=0; ii<bs; ii++) {
412       for (jj=0; jj<bs; jj++) {
413         *bap++ = *value++;
414       }
415     }
416   }
417   noinsert2:;
418   ailen[row] = nrow;
419   PetscFunctionReturn(0);
420 }
421 
422 /*
423    This routine is exactly duplicated in mpibaij.c
424 */
425 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
426 {
427   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
428   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
429   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
430   PetscErrorCode    ierr;
431   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
432   PetscBool         roworiented=a->roworiented;
433   const PetscScalar *value     = v;
434   MatScalar         *ap,*aa = a->a,*bap;
435 
436   PetscFunctionBegin;
437   rp   = aj + ai[row];
438   ap   = aa + bs2*ai[row];
439   rmax = imax[row];
440   nrow = ailen[row];
441   low  = 0;
442   high = nrow;
443   value = v;
444   while (high-low > 7) {
445     t = (low+high)/2;
446     if (rp[t] > col) high = t;
447     else             low  = t;
448   }
449   for (i=low; i<high; i++) {
450     if (rp[i] > col) break;
451     if (rp[i] == col) {
452       bap = ap +  bs2*i;
453       if (roworiented) {
454         if (is == ADD_VALUES) {
455           for (ii=0; ii<bs; ii++) {
456             for (jj=ii; jj<bs2; jj+=bs) {
457               bap[jj] += *value++;
458             }
459           }
460         } else {
461           for (ii=0; ii<bs; ii++) {
462             for (jj=ii; jj<bs2; jj+=bs) {
463               bap[jj] = *value++;
464             }
465           }
466         }
467       } else {
468         if (is == ADD_VALUES) {
469           for (ii=0; ii<bs; ii++,value+=bs) {
470             for (jj=0; jj<bs; jj++) {
471               bap[jj] += value[jj];
472             }
473             bap += bs;
474           }
475         } else {
476           for (ii=0; ii<bs; ii++,value+=bs) {
477             for (jj=0; jj<bs; jj++) {
478               bap[jj]  = value[jj];
479             }
480             bap += bs;
481           }
482         }
483       }
484       goto noinsert2;
485     }
486   }
487   if (nonew == 1) goto noinsert2;
488   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
489   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
490   N = nrow++ - 1; high++;
491   /* shift up all the later entries in this row */
492   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
493   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
494   rp[i] = col;
495   bap   = ap +  bs2*i;
496   if (roworiented) {
497     for (ii=0; ii<bs; ii++) {
498       for (jj=ii; jj<bs2; jj+=bs) {
499         bap[jj] = *value++;
500       }
501     }
502   } else {
503     for (ii=0; ii<bs; ii++) {
504       for (jj=0; jj<bs; jj++) {
505         *bap++ = *value++;
506       }
507     }
508   }
509   noinsert2:;
510   ailen[row] = nrow;
511   PetscFunctionReturn(0);
512 }
513 
514 /*
515     This routine could be optimized by removing the need for the block copy below and passing stride information
516   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
517 */
518 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
519 {
520   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
521   const MatScalar *value;
522   MatScalar       *barray     =baij->barray;
523   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
524   PetscErrorCode  ierr;
525   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
526   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
527   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
528 
529   PetscFunctionBegin;
530   if (!barray) {
531     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
532     baij->barray = barray;
533   }
534 
535   if (roworiented) {
536     stepval = (n-1)*bs;
537   } else {
538     stepval = (m-1)*bs;
539   }
540   for (i=0; i<m; i++) {
541     if (im[i] < 0) continue;
542 #if defined(PETSC_USE_DEBUG)
543     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
544 #endif
545     if (im[i] >= rstart && im[i] < rend) {
546       row = im[i] - rstart;
547       for (j=0; j<n; j++) {
548         if (im[i] > in[j]) {
549           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
550           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
551         }
552         /* If NumCol = 1 then a copy is not required */
553         if ((roworiented) && (n == 1)) {
554           barray = (MatScalar*) v + i*bs2;
555         } else if ((!roworiented) && (m == 1)) {
556           barray = (MatScalar*) v + j*bs2;
557         } else { /* Here a copy is required */
558           if (roworiented) {
559             value = v + i*(stepval+bs)*bs + j*bs;
560           } else {
561             value = v + j*(stepval+bs)*bs + i*bs;
562           }
563           for (ii=0; ii<bs; ii++,value+=stepval) {
564             for (jj=0; jj<bs; jj++) {
565               *barray++ = *value++;
566             }
567           }
568           barray -=bs2;
569         }
570 
571         if (in[j] >= cstart && in[j] < cend) {
572           col  = in[j] - cstart;
573           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
574         } else if (in[j] < 0) continue;
575 #if defined(PETSC_USE_DEBUG)
576         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
577 #endif
578         else {
579           if (mat->was_assembled) {
580             if (!baij->colmap) {
581               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
582             }
583 
584 #if defined(PETSC_USE_DEBUG)
585 #if defined(PETSC_USE_CTABLE)
586             { PetscInt data;
587               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
588               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
589             }
590 #else
591             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
592 #endif
593 #endif
594 #if defined(PETSC_USE_CTABLE)
595             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
596             col  = (col - 1)/bs;
597 #else
598             col = (baij->colmap[in[j]] - 1)/bs;
599 #endif
600             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
601               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
602               col  = in[j];
603             }
604           } else col = in[j];
605           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
606         }
607       }
608     } else {
609       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
610       if (!baij->donotstash) {
611         if (roworiented) {
612           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
613         } else {
614           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
615         }
616       }
617     }
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
623 {
624   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
625   PetscErrorCode ierr;
626   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
627   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
628 
629   PetscFunctionBegin;
630   for (i=0; i<m; i++) {
631     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
632     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
633     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
634       row = idxm[i] - bsrstart;
635       for (j=0; j<n; j++) {
636         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
637         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
638         if (idxn[j] >= bscstart && idxn[j] < bscend) {
639           col  = idxn[j] - bscstart;
640           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
641         } else {
642           if (!baij->colmap) {
643             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
644           }
645 #if defined(PETSC_USE_CTABLE)
646           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
647           data--;
648 #else
649           data = baij->colmap[idxn[j]/bs]-1;
650 #endif
651           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
652           else {
653             col  = data + idxn[j]%bs;
654             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
655           }
656         }
657       }
658     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
664 {
665   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
666   PetscErrorCode ierr;
667   PetscReal      sum[2],*lnorm2;
668 
669   PetscFunctionBegin;
670   if (baij->size == 1) {
671     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
672   } else {
673     if (type == NORM_FROBENIUS) {
674       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
675       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
676       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
677       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
678       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
679       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
680       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
681       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
682     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
683       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
684       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
685       PetscReal    *rsum,*rsum2,vabs;
686       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
687       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
688       MatScalar    *v;
689 
690       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
691       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
692       /* Amat */
693       v = amat->a; jj = amat->j;
694       for (brow=0; brow<mbs; brow++) {
695         grow = bs*(rstart + brow);
696         nz   = amat->i[brow+1] - amat->i[brow];
697         for (bcol=0; bcol<nz; bcol++) {
698           gcol = bs*(rstart + *jj); jj++;
699           for (col=0; col<bs; col++) {
700             for (row=0; row<bs; row++) {
701               vabs            = PetscAbsScalar(*v); v++;
702               rsum[gcol+col] += vabs;
703               /* non-diagonal block */
704               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
705             }
706           }
707         }
708         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
709       }
710       /* Bmat */
711       v = bmat->a; jj = bmat->j;
712       for (brow=0; brow<mbs; brow++) {
713         grow = bs*(rstart + brow);
714         nz = bmat->i[brow+1] - bmat->i[brow];
715         for (bcol=0; bcol<nz; bcol++) {
716           gcol = bs*garray[*jj]; jj++;
717           for (col=0; col<bs; col++) {
718             for (row=0; row<bs; row++) {
719               vabs            = PetscAbsScalar(*v); v++;
720               rsum[gcol+col] += vabs;
721               rsum[grow+row] += vabs;
722             }
723           }
724         }
725         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
726       }
727       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
728       *norm = 0.0;
729       for (col=0; col<mat->cmap->N; col++) {
730         if (rsum2[col] > *norm) *norm = rsum2[col];
731       }
732       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
733     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
734   }
735   PetscFunctionReturn(0);
736 }
737 
738 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
739 {
740   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
741   PetscErrorCode ierr;
742   PetscInt       nstash,reallocs;
743 
744   PetscFunctionBegin;
745   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
746 
747   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
748   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
749   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
750   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
751   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
752   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
753   PetscFunctionReturn(0);
754 }
755 
756 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
757 {
758   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
759   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
760   PetscErrorCode ierr;
761   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
762   PetscInt       *row,*col;
763   PetscBool      other_disassembled;
764   PetscMPIInt    n;
765   PetscBool      r1,r2,r3;
766   MatScalar      *val;
767 
768   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
769   PetscFunctionBegin;
770   if (!baij->donotstash &&  !mat->nooffprocentries) {
771     while (1) {
772       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
773       if (!flg) break;
774 
775       for (i=0; i<n;) {
776         /* Now identify the consecutive vals belonging to the same row */
777         for (j=i,rstart=row[j]; j<n; j++) {
778           if (row[j] != rstart) break;
779         }
780         if (j < n) ncols = j-i;
781         else       ncols = n-i;
782         /* Now assemble all these values with a single function call */
783         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
784         i    = j;
785       }
786     }
787     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
788     /* Now process the block-stash. Since the values are stashed column-oriented,
789        set the roworiented flag to column oriented, and after MatSetValues()
790        restore the original flags */
791     r1 = baij->roworiented;
792     r2 = a->roworiented;
793     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
794 
795     baij->roworiented = PETSC_FALSE;
796     a->roworiented    = PETSC_FALSE;
797 
798     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
799     while (1) {
800       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
801       if (!flg) break;
802 
803       for (i=0; i<n;) {
804         /* Now identify the consecutive vals belonging to the same row */
805         for (j=i,rstart=row[j]; j<n; j++) {
806           if (row[j] != rstart) break;
807         }
808         if (j < n) ncols = j-i;
809         else       ncols = n-i;
810         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
811         i    = j;
812       }
813     }
814     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
815 
816     baij->roworiented = r1;
817     a->roworiented    = r2;
818 
819     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
820   }
821 
822   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
823   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
824 
825   /* determine if any processor has disassembled, if so we must
826      also disassemble ourselfs, in order that we may reassemble. */
827   /*
828      if nonzero structure of submatrix B cannot change then we know that
829      no processor disassembled thus we can skip this stuff
830   */
831   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
832     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
833     if (mat->was_assembled && !other_disassembled) {
834       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
835     }
836   }
837 
838   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
839     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
840   }
841   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
842   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
843 
844   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
845 
846   baij->rowvalues = 0;
847 
848   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
849   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
850     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
851     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
852   }
853   PetscFunctionReturn(0);
854 }
855 
856 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
857 #include <petscdraw.h>
858 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
859 {
860   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
861   PetscErrorCode    ierr;
862   PetscInt          bs   = mat->rmap->bs;
863   PetscMPIInt       rank = baij->rank;
864   PetscBool         iascii,isdraw;
865   PetscViewer       sviewer;
866   PetscViewerFormat format;
867 
868   PetscFunctionBegin;
869   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
870   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
871   if (iascii) {
872     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
873     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
874       MatInfo info;
875       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
876       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
877       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
878       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
879       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
880       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
881       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
882       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
883       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
884       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
885       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
886       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
887       PetscFunctionReturn(0);
888     } else if (format == PETSC_VIEWER_ASCII_INFO) {
889       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
890       PetscFunctionReturn(0);
891     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
892       PetscFunctionReturn(0);
893     }
894   }
895 
896   if (isdraw) {
897     PetscDraw draw;
898     PetscBool isnull;
899     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
900     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
901     if (isnull) PetscFunctionReturn(0);
902   }
903 
904   {
905     /* assemble the entire matrix onto first processor. */
906     Mat          A;
907     Mat_SeqSBAIJ *Aloc;
908     Mat_SeqBAIJ  *Bloc;
909     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
910     MatScalar    *a;
911     const char   *matname;
912 
913     /* Should this be the same type as mat? */
914     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
915     if (!rank) {
916       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
917     } else {
918       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
919     }
920     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
921     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
922     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
923     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
924 
925     /* copy over the A part */
926     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
927     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
928     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
929 
930     for (i=0; i<mbs; i++) {
931       rvals[0] = bs*(baij->rstartbs + i);
932       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
933       for (j=ai[i]; j<ai[i+1]; j++) {
934         col = (baij->cstartbs+aj[j])*bs;
935         for (k=0; k<bs; k++) {
936           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
937           col++;
938           a += bs;
939         }
940       }
941     }
942     /* copy over the B part */
943     Bloc = (Mat_SeqBAIJ*)baij->B->data;
944     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
945     for (i=0; i<mbs; i++) {
946 
947       rvals[0] = bs*(baij->rstartbs + i);
948       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
949       for (j=ai[i]; j<ai[i+1]; j++) {
950         col = baij->garray[aj[j]]*bs;
951         for (k=0; k<bs; k++) {
952           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
953           col++;
954           a += bs;
955         }
956       }
957     }
958     ierr = PetscFree(rvals);CHKERRQ(ierr);
959     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
960     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961     /*
962        Everyone has to call to draw the matrix since the graphics waits are
963        synchronized across all processors that share the PetscDraw object
964     */
965     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
966     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
967     if (!rank) {
968       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
969       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
970     }
971     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
972     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
973     ierr = MatDestroy(&A);CHKERRQ(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
979 {
980   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
981   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
982   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
983   PetscErrorCode ierr;
984   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
985   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
986   int            fd;
987   PetscScalar    *column_values;
988   FILE           *file;
989   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
990   PetscInt       message_count,flowcontrolcount;
991 
992   PetscFunctionBegin;
993   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
994   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
995   nz   = bs2*(A->nz + B->nz);
996   rlen = mat->rmap->n;
997   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
998   if (!rank) {
999     header[0] = MAT_FILE_CLASSID;
1000     header[1] = mat->rmap->N;
1001     header[2] = mat->cmap->N;
1002 
1003     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1004     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1005     /* get largest number of rows any processor has */
1006     range = mat->rmap->range;
1007     for (i=1; i<size; i++) {
1008       rlen = PetscMax(rlen,range[i+1] - range[i]);
1009     }
1010   } else {
1011     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1012   }
1013 
1014   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1015   /* compute lengths of each row  */
1016   for (i=0; i<a->mbs; i++) {
1017     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1018   }
1019   /* store the row lengths to the file */
1020   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1021   if (!rank) {
1022     MPI_Status status;
1023     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1024     rlen = (range[1] - range[0])/bs;
1025     for (i=0; i<rlen; i++) {
1026       for (j=0; j<bs; j++) {
1027         row_lens[i*bs+j] = bs*crow_lens[i];
1028       }
1029     }
1030     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1031     for (i=1; i<size; i++) {
1032       rlen = (range[i+1] - range[i])/bs;
1033       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1034       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1035       for (k=0; k<rlen; k++) {
1036         for (j=0; j<bs; j++) {
1037           row_lens[k*bs+j] = bs*crow_lens[k];
1038         }
1039       }
1040       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1041     }
1042     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1043     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1044   } else {
1045     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1046     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1047     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1048   }
1049   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1050 
1051   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1052      information needed to make it for each row from a block row. This does require more communication but still not more than
1053      the communication needed for the nonzero values  */
1054   nzmax = nz; /*  space a largest processor needs */
1055   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1056   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1057   cnt   = 0;
1058   for (i=0; i<a->mbs; i++) {
1059     pcnt = cnt;
1060     for (j=B->i[i]; j<B->i[i+1]; j++) {
1061       if ((col = garray[B->j[j]]) > cstart) break;
1062       for (l=0; l<bs; l++) {
1063         column_indices[cnt++] = bs*col+l;
1064       }
1065     }
1066     for (k=A->i[i]; k<A->i[i+1]; k++) {
1067       for (l=0; l<bs; l++) {
1068         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1069       }
1070     }
1071     for (; j<B->i[i+1]; j++) {
1072       for (l=0; l<bs; l++) {
1073         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1074       }
1075     }
1076     len = cnt - pcnt;
1077     for (k=1; k<bs; k++) {
1078       ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr);
1079       cnt += len;
1080     }
1081   }
1082   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1083 
1084   /* store the columns to the file */
1085   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1086   if (!rank) {
1087     MPI_Status status;
1088     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1089     for (i=1; i<size; i++) {
1090       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1091       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1092       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1093       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1094     }
1095     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1096   } else {
1097     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1098     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1099     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1100     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1101   }
1102   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1103 
1104   /* load up the numerical values */
1105   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1106   cnt  = 0;
1107   for (i=0; i<a->mbs; i++) {
1108     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1109     for (j=B->i[i]; j<B->i[i+1]; j++) {
1110       if (garray[B->j[j]] > cstart) break;
1111       for (l=0; l<bs; l++) {
1112         for (ll=0; ll<bs; ll++) {
1113           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1114         }
1115       }
1116       cnt += bs;
1117     }
1118     for (k=A->i[i]; k<A->i[i+1]; k++) {
1119       for (l=0; l<bs; l++) {
1120         for (ll=0; ll<bs; ll++) {
1121           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1122         }
1123       }
1124       cnt += bs;
1125     }
1126     for (; j<B->i[i+1]; j++) {
1127       for (l=0; l<bs; l++) {
1128         for (ll=0; ll<bs; ll++) {
1129           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1130         }
1131       }
1132       cnt += bs;
1133     }
1134     cnt += (bs-1)*rlen;
1135   }
1136   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1137 
1138   /* store the column values to the file */
1139   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1140   if (!rank) {
1141     MPI_Status status;
1142     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1143     for (i=1; i<size; i++) {
1144       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1145       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1146       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1147       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1148     }
1149     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1150   } else {
1151     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1152     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1153     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1154     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1155   }
1156   ierr = PetscFree(column_values);CHKERRQ(ierr);
1157 
1158   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1159   if (file) {
1160     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1161   }
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1166 {
1167   PetscErrorCode ierr;
1168   PetscBool      iascii,isdraw,issocket,isbinary;
1169 
1170   PetscFunctionBegin;
1171   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1172   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1173   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1174   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1175   if (iascii || isdraw || issocket) {
1176     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1177   } else if (isbinary) {
1178     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1184 {
1185   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1186   PetscErrorCode ierr;
1187 
1188   PetscFunctionBegin;
1189 #if defined(PETSC_USE_LOG)
1190   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1191 #endif
1192   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1193   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1194   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1195   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1196 #if defined(PETSC_USE_CTABLE)
1197   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1198 #else
1199   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1200 #endif
1201   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1202   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1203   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1204   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1205   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1206   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1207   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1208   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1209   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1210   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1211   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1212   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1213   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1214   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1215   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1216 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1217   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1218 #endif
1219   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1220   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1221   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1222   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1223 
1224   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1229 #if defined(PETSC_HAVE_ELEMENTAL)
1230   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1231 #endif
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1238 {
1239   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1240   PetscErrorCode    ierr;
1241   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1242   PetscScalar       *from;
1243   const PetscScalar *x;
1244 
1245   PetscFunctionBegin;
1246   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1247   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1248 
1249   /* diagonal part */
1250   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1251   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1252 
1253   /* subdiagonal part */
1254   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1255 
1256   /* copy x into the vec slvec0 */
1257   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1258   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1259 
1260   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1261   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1262   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1263 
1264   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1266   /* supperdiagonal part */
1267   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1272 {
1273   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1274   PetscErrorCode    ierr;
1275   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1276   PetscScalar       *from;
1277   const PetscScalar *x;
1278 
1279   PetscFunctionBegin;
1280   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1281   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1282 
1283   /* diagonal part */
1284   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1285   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1286 
1287   /* subdiagonal part */
1288   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1289 
1290   /* copy x into the vec slvec0 */
1291   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1292   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1293 
1294   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1295   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1296   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1297 
1298   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1299   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1300   /* supperdiagonal part */
1301   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1306 {
1307   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       nt;
1310 
1311   PetscFunctionBegin;
1312   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1313   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1314 
1315   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1316   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1317 
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   /* do diagonal part */
1320   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1321   /* do supperdiagonal part */
1322   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1324   /* do subdiagonal part */
1325   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1326   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1327   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1332 {
1333   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1334   PetscErrorCode    ierr;
1335   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1336   PetscScalar       *from,zero=0.0;
1337   const PetscScalar *x;
1338 
1339   PetscFunctionBegin;
1340   /*
1341   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1342   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1343   */
1344   /* diagonal part */
1345   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1346   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1347 
1348   /* subdiagonal part */
1349   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1350 
1351   /* copy x into the vec slvec0 */
1352   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1353   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1354   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1355   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1356 
1357   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1358   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1359   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1360 
1361   /* supperdiagonal part */
1362   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1367 {
1368   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1373   /* do diagonal part */
1374   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1375   /* do supperdiagonal part */
1376   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1377   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1378 
1379   /* do subdiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 /*
1387   This only works correctly for square matrices where the subblock A->A is the
1388    diagonal block
1389 */
1390 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1391 {
1392   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1397   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1402 {
1403   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1404   PetscErrorCode ierr;
1405 
1406   PetscFunctionBegin;
1407   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1408   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1413 {
1414   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1415   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1416   PetscErrorCode ierr;
1417   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1418   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1419   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1420 
1421   PetscFunctionBegin;
1422   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1423   mat->getrowactive = PETSC_TRUE;
1424 
1425   if (!mat->rowvalues && (idx || v)) {
1426     /*
1427         allocate enough space to hold information from the longest row.
1428     */
1429     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1430     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1431     PetscInt     max = 1,mbs = mat->mbs,tmp;
1432     for (i=0; i<mbs; i++) {
1433       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1434       if (max < tmp) max = tmp;
1435     }
1436     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1437   }
1438 
1439   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1440   lrow = row - brstart;  /* local row index */
1441 
1442   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1443   if (!v)   {pvA = 0; pvB = 0;}
1444   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1445   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1446   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1447   nztot = nzA + nzB;
1448 
1449   cmap = mat->garray;
1450   if (v  || idx) {
1451     if (nztot) {
1452       /* Sort by increasing column numbers, assuming A and B already sorted */
1453       PetscInt imark = -1;
1454       if (v) {
1455         *v = v_p = mat->rowvalues;
1456         for (i=0; i<nzB; i++) {
1457           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1458           else break;
1459         }
1460         imark = i;
1461         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1462         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1463       }
1464       if (idx) {
1465         *idx = idx_p = mat->rowindices;
1466         if (imark > -1) {
1467           for (i=0; i<imark; i++) {
1468             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1469           }
1470         } else {
1471           for (i=0; i<nzB; i++) {
1472             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1473             else break;
1474           }
1475           imark = i;
1476         }
1477         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1478         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1479       }
1480     } else {
1481       if (idx) *idx = 0;
1482       if (v)   *v   = 0;
1483     }
1484   }
1485   *nz  = nztot;
1486   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1487   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1492 {
1493   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1494 
1495   PetscFunctionBegin;
1496   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1497   baij->getrowactive = PETSC_FALSE;
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1502 {
1503   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1504   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1505 
1506   PetscFunctionBegin;
1507   aA->getrow_utriangular = PETSC_TRUE;
1508   PetscFunctionReturn(0);
1509 }
1510 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1511 {
1512   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1513   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1514 
1515   PetscFunctionBegin;
1516   aA->getrow_utriangular = PETSC_FALSE;
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1521 {
1522   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1523   PetscErrorCode ierr;
1524 
1525   PetscFunctionBegin;
1526   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1527   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1532 {
1533   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1538   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1543    Input: isrow       - distributed(parallel),
1544           iscol_local - locally owned (seq)
1545 */
1546 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1547 {
1548   PetscErrorCode ierr;
1549   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1550   const PetscInt *ptr1,*ptr2;
1551 
1552   PetscFunctionBegin;
1553   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1554   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1555   if (sz1 > sz2) {
1556     *flg = PETSC_FALSE;
1557     PetscFunctionReturn(0);
1558   }
1559 
1560   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1561   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1562 
1563   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1564   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1565   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1566   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1567   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1568   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1569 
1570   nmatch=0;
1571   k     = 0;
1572   for (i=0; i<sz1; i++){
1573     for (j=k; j<sz2; j++){
1574       if (a1[i] == a2[j]) {
1575         k = j; nmatch++;
1576         break;
1577       }
1578     }
1579   }
1580   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1581   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1582   ierr = PetscFree(a1);CHKERRQ(ierr);
1583   ierr = PetscFree(a2);CHKERRQ(ierr);
1584   if (nmatch < sz1) {
1585     *flg = PETSC_FALSE;
1586   } else {
1587     *flg = PETSC_TRUE;
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1593 {
1594   PetscErrorCode ierr;
1595   IS             iscol_local;
1596   PetscInt       csize;
1597   PetscBool      isequal;
1598 
1599   PetscFunctionBegin;
1600   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1601   if (call == MAT_REUSE_MATRIX) {
1602     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1603     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1604   } else {
1605     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1606     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1607     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1608   }
1609 
1610   /* now call MatCreateSubMatrix_MPIBAIJ() */
1611   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1612   if (call == MAT_INITIAL_MATRIX) {
1613     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1614     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1615   }
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1620 {
1621   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1622   PetscErrorCode ierr;
1623 
1624   PetscFunctionBegin;
1625   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1626   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1631 {
1632   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1633   Mat            A  = a->A,B = a->B;
1634   PetscErrorCode ierr;
1635   PetscLogDouble isend[5],irecv[5];
1636 
1637   PetscFunctionBegin;
1638   info->block_size = (PetscReal)matin->rmap->bs;
1639 
1640   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1641 
1642   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1643   isend[3] = info->memory;  isend[4] = info->mallocs;
1644 
1645   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1646 
1647   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1648   isend[3] += info->memory;  isend[4] += info->mallocs;
1649   if (flag == MAT_LOCAL) {
1650     info->nz_used      = isend[0];
1651     info->nz_allocated = isend[1];
1652     info->nz_unneeded  = isend[2];
1653     info->memory       = isend[3];
1654     info->mallocs      = isend[4];
1655   } else if (flag == MAT_GLOBAL_MAX) {
1656     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1657 
1658     info->nz_used      = irecv[0];
1659     info->nz_allocated = irecv[1];
1660     info->nz_unneeded  = irecv[2];
1661     info->memory       = irecv[3];
1662     info->mallocs      = irecv[4];
1663   } else if (flag == MAT_GLOBAL_SUM) {
1664     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1665 
1666     info->nz_used      = irecv[0];
1667     info->nz_allocated = irecv[1];
1668     info->nz_unneeded  = irecv[2];
1669     info->memory       = irecv[3];
1670     info->mallocs      = irecv[4];
1671   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1672   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1673   info->fill_ratio_needed = 0;
1674   info->factor_mallocs    = 0;
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1679 {
1680   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1681   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1682   PetscErrorCode ierr;
1683 
1684   PetscFunctionBegin;
1685   switch (op) {
1686   case MAT_NEW_NONZERO_LOCATIONS:
1687   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1688   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1689   case MAT_KEEP_NONZERO_PATTERN:
1690   case MAT_SUBMAT_SINGLEIS:
1691   case MAT_NEW_NONZERO_LOCATION_ERR:
1692     MatCheckPreallocated(A,1);
1693     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1694     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1695     break;
1696   case MAT_ROW_ORIENTED:
1697     MatCheckPreallocated(A,1);
1698     a->roworiented = flg;
1699 
1700     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1701     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1702     break;
1703   case MAT_NEW_DIAGONALS:
1704   case MAT_SORTED_FULL:
1705     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1706     break;
1707   case MAT_IGNORE_OFF_PROC_ENTRIES:
1708     a->donotstash = flg;
1709     break;
1710   case MAT_USE_HASH_TABLE:
1711     a->ht_flag = flg;
1712     break;
1713   case MAT_HERMITIAN:
1714     MatCheckPreallocated(A,1);
1715     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1716     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1717 #if defined(PETSC_USE_COMPLEX)
1718     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1719 #endif
1720     break;
1721   case MAT_SPD:
1722     A->spd_set = PETSC_TRUE;
1723     A->spd     = flg;
1724     if (flg) {
1725       A->symmetric                  = PETSC_TRUE;
1726       A->structurally_symmetric     = PETSC_TRUE;
1727       A->symmetric_set              = PETSC_TRUE;
1728       A->structurally_symmetric_set = PETSC_TRUE;
1729     }
1730     break;
1731   case MAT_SYMMETRIC:
1732     MatCheckPreallocated(A,1);
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     break;
1735   case MAT_STRUCTURALLY_SYMMETRIC:
1736     MatCheckPreallocated(A,1);
1737     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1738     break;
1739   case MAT_SYMMETRY_ETERNAL:
1740     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1741     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1742     break;
1743   case MAT_IGNORE_LOWER_TRIANGULAR:
1744     aA->ignore_ltriangular = flg;
1745     break;
1746   case MAT_ERROR_LOWER_TRIANGULAR:
1747     aA->ignore_ltriangular = flg;
1748     break;
1749   case MAT_GETROW_UPPERTRIANGULAR:
1750     aA->getrow_utriangular = flg;
1751     break;
1752   default:
1753     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1754   }
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1759 {
1760   PetscErrorCode ierr;
1761 
1762   PetscFunctionBegin;
1763   if (reuse == MAT_INITIAL_MATRIX) {
1764     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1765   }  else if (reuse == MAT_REUSE_MATRIX) {
1766     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1767   }
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1772 {
1773   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1774   Mat            a     = baij->A, b=baij->B;
1775   PetscErrorCode ierr;
1776   PetscInt       nv,m,n;
1777   PetscBool      flg;
1778 
1779   PetscFunctionBegin;
1780   if (ll != rr) {
1781     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1782     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1783   }
1784   if (!ll) PetscFunctionReturn(0);
1785 
1786   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1787   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1788 
1789   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1790   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1791 
1792   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1793 
1794   /* left diagonalscale the off-diagonal part */
1795   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1796 
1797   /* scale the diagonal part */
1798   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1799 
1800   /* right diagonalscale the off-diagonal part */
1801   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1802   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1807 {
1808   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1809   PetscErrorCode ierr;
1810 
1811   PetscFunctionBegin;
1812   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1817 
1818 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1819 {
1820   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1821   Mat            a,b,c,d;
1822   PetscBool      flg;
1823   PetscErrorCode ierr;
1824 
1825   PetscFunctionBegin;
1826   a = matA->A; b = matA->B;
1827   c = matB->A; d = matB->B;
1828 
1829   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1830   if (flg) {
1831     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1832   }
1833   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1838 {
1839   PetscErrorCode ierr;
1840   PetscBool      isbaij;
1841 
1842   PetscFunctionBegin;
1843   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1844   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1845   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1846   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1847     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1848     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1849     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1850   } else {
1851     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1852     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1853 
1854     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1855     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1856   }
1857   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1862 {
1863   PetscErrorCode ierr;
1864 
1865   PetscFunctionBegin;
1866   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1871 {
1872   PetscErrorCode ierr;
1873   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1874   PetscBLASInt   bnz,one=1;
1875   Mat_SeqSBAIJ   *xa,*ya;
1876   Mat_SeqBAIJ    *xb,*yb;
1877 
1878   PetscFunctionBegin;
1879   if (str == SAME_NONZERO_PATTERN) {
1880     PetscScalar alpha = a;
1881     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1882     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1883     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1884     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1885     xb   = (Mat_SeqBAIJ*)xx->B->data;
1886     yb   = (Mat_SeqBAIJ*)yy->B->data;
1887     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1888     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1889     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1890   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1891     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1892     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1893     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1894   } else {
1895     Mat      B;
1896     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1897     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1898     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1899     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1900     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1901     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1902     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1903     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1904     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1905     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1906     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1907     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1908     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1909     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1910     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1911     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1912     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1913     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1914     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1915     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1916   }
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1921 {
1922   PetscErrorCode ierr;
1923   PetscInt       i;
1924   PetscBool      flg;
1925 
1926   PetscFunctionBegin;
1927   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1928   for (i=0; i<n; i++) {
1929     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1930     if (!flg) {
1931       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1932     }
1933   }
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1938 {
1939   PetscErrorCode ierr;
1940   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1941   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1942 
1943   PetscFunctionBegin;
1944   if (!Y->preallocated) {
1945     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1946   } else if (!aij->nz) {
1947     PetscInt nonew = aij->nonew;
1948     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1949     aij->nonew = nonew;
1950   }
1951   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1956 {
1957   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1958   PetscErrorCode ierr;
1959 
1960   PetscFunctionBegin;
1961   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1962   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1963   if (d) {
1964     PetscInt rstart;
1965     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1966     *d += rstart/A->rmap->bs;
1967 
1968   }
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1973 {
1974   PetscFunctionBegin;
1975   *a = ((Mat_MPISBAIJ*)A->data)->A;
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 /* -------------------------------------------------------------------*/
1980 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1981                                        MatGetRow_MPISBAIJ,
1982                                        MatRestoreRow_MPISBAIJ,
1983                                        MatMult_MPISBAIJ,
1984                                /*  4*/ MatMultAdd_MPISBAIJ,
1985                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1986                                        MatMultAdd_MPISBAIJ,
1987                                        0,
1988                                        0,
1989                                        0,
1990                                /* 10*/ 0,
1991                                        0,
1992                                        0,
1993                                        MatSOR_MPISBAIJ,
1994                                        MatTranspose_MPISBAIJ,
1995                                /* 15*/ MatGetInfo_MPISBAIJ,
1996                                        MatEqual_MPISBAIJ,
1997                                        MatGetDiagonal_MPISBAIJ,
1998                                        MatDiagonalScale_MPISBAIJ,
1999                                        MatNorm_MPISBAIJ,
2000                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
2001                                        MatAssemblyEnd_MPISBAIJ,
2002                                        MatSetOption_MPISBAIJ,
2003                                        MatZeroEntries_MPISBAIJ,
2004                                /* 24*/ 0,
2005                                        0,
2006                                        0,
2007                                        0,
2008                                        0,
2009                                /* 29*/ MatSetUp_MPISBAIJ,
2010                                        0,
2011                                        0,
2012                                        MatGetDiagonalBlock_MPISBAIJ,
2013                                        0,
2014                                /* 34*/ MatDuplicate_MPISBAIJ,
2015                                        0,
2016                                        0,
2017                                        0,
2018                                        0,
2019                                /* 39*/ MatAXPY_MPISBAIJ,
2020                                        MatCreateSubMatrices_MPISBAIJ,
2021                                        MatIncreaseOverlap_MPISBAIJ,
2022                                        MatGetValues_MPISBAIJ,
2023                                        MatCopy_MPISBAIJ,
2024                                /* 44*/ 0,
2025                                        MatScale_MPISBAIJ,
2026                                        MatShift_MPISBAIJ,
2027                                        0,
2028                                        0,
2029                                /* 49*/ 0,
2030                                        0,
2031                                        0,
2032                                        0,
2033                                        0,
2034                                /* 54*/ 0,
2035                                        0,
2036                                        MatSetUnfactored_MPISBAIJ,
2037                                        0,
2038                                        MatSetValuesBlocked_MPISBAIJ,
2039                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2040                                        0,
2041                                        0,
2042                                        0,
2043                                        0,
2044                                /* 64*/ 0,
2045                                        0,
2046                                        0,
2047                                        0,
2048                                        0,
2049                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2050                                        0,
2051                                        MatConvert_MPISBAIJ_Basic,
2052                                        0,
2053                                        0,
2054                                /* 74*/ 0,
2055                                        0,
2056                                        0,
2057                                        0,
2058                                        0,
2059                                /* 79*/ 0,
2060                                        0,
2061                                        0,
2062                                        0,
2063                                        MatLoad_MPISBAIJ,
2064                                /* 84*/ 0,
2065                                        0,
2066                                        0,
2067                                        0,
2068                                        0,
2069                                /* 89*/ 0,
2070                                        0,
2071                                        0,
2072                                        0,
2073                                        0,
2074                                /* 94*/ 0,
2075                                        0,
2076                                        0,
2077                                        0,
2078                                        0,
2079                                /* 99*/ 0,
2080                                        0,
2081                                        0,
2082                                        0,
2083                                        0,
2084                                /*104*/ 0,
2085                                        MatRealPart_MPISBAIJ,
2086                                        MatImaginaryPart_MPISBAIJ,
2087                                        MatGetRowUpperTriangular_MPISBAIJ,
2088                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2089                                /*109*/ 0,
2090                                        0,
2091                                        0,
2092                                        0,
2093                                        MatMissingDiagonal_MPISBAIJ,
2094                                /*114*/ 0,
2095                                        0,
2096                                        0,
2097                                        0,
2098                                        0,
2099                                /*119*/ 0,
2100                                        0,
2101                                        0,
2102                                        0,
2103                                        0,
2104                                /*124*/ 0,
2105                                        0,
2106                                        0,
2107                                        0,
2108                                        0,
2109                                /*129*/ 0,
2110                                        0,
2111                                        0,
2112                                        0,
2113                                        0,
2114                                /*134*/ 0,
2115                                        0,
2116                                        0,
2117                                        0,
2118                                        0,
2119                                /*139*/ MatSetBlockSizes_Default,
2120                                        0,
2121                                        0,
2122                                        0,
2123                                        0,
2124                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2125 };
2126 
2127 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2128 {
2129   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
2130   PetscErrorCode ierr;
2131   PetscInt       i,mbs,Mbs;
2132   PetscMPIInt    size;
2133 
2134   PetscFunctionBegin;
2135   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2136   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2137   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2138   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2139   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
2140   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
2141 
2142   mbs = B->rmap->n/bs;
2143   Mbs = B->rmap->N/bs;
2144   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2145 
2146   B->rmap->bs = bs;
2147   b->bs2      = bs*bs;
2148   b->mbs      = mbs;
2149   b->Mbs      = Mbs;
2150   b->nbs      = B->cmap->n/bs;
2151   b->Nbs      = B->cmap->N/bs;
2152 
2153   for (i=0; i<=b->size; i++) {
2154     b->rangebs[i] = B->rmap->range[i]/bs;
2155   }
2156   b->rstartbs = B->rmap->rstart/bs;
2157   b->rendbs   = B->rmap->rend/bs;
2158 
2159   b->cstartbs = B->cmap->rstart/bs;
2160   b->cendbs   = B->cmap->rend/bs;
2161 
2162 #if defined(PETSC_USE_CTABLE)
2163   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2164 #else
2165   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2166 #endif
2167   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2168   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2169   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2170   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2171   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2172   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2173   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2174   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2175   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2176 
2177   /* Because the B will have been resized we simply destroy it and create a new one each time */
2178   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2179   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2180   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2181   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2182   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2183   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2184 
2185   if (!B->preallocated) {
2186     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2187     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2188     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2189     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2190     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2191   }
2192 
2193   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2194   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2195 
2196   B->preallocated  = PETSC_TRUE;
2197   B->was_assembled = PETSC_FALSE;
2198   B->assembled     = PETSC_FALSE;
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2203 {
2204   PetscInt       m,rstart,cend;
2205   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2206   const PetscInt *JJ    =0;
2207   PetscScalar    *values=0;
2208   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2209   PetscErrorCode ierr;
2210   PetscBool      nooffprocentries;
2211 
2212   PetscFunctionBegin;
2213   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2214   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2215   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2216   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2217   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2218   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2219   m      = B->rmap->n/bs;
2220   rstart = B->rmap->rstart/bs;
2221   cend   = B->cmap->rend/bs;
2222 
2223   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2224   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2225   for (i=0; i<m; i++) {
2226     nz = ii[i+1] - ii[i];
2227     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2228     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2229     JJ     = jj + ii[i];
2230     bd     = 0;
2231     for (j=0; j<nz; j++) {
2232       if (*JJ >= i + rstart) break;
2233       JJ++;
2234       bd++;
2235     }
2236     d  = 0;
2237     for (; j<nz; j++) {
2238       if (*JJ++ >= cend) break;
2239       d++;
2240     }
2241     d_nnz[i] = d;
2242     o_nnz[i] = nz - d - bd;
2243     nz       = nz - bd;
2244     nz_max = PetscMax(nz_max,nz);
2245   }
2246   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2247   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2248   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2249 
2250   values = (PetscScalar*)V;
2251   if (!values) {
2252     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2253   }
2254   for (i=0; i<m; i++) {
2255     PetscInt          row    = i + rstart;
2256     PetscInt          ncols  = ii[i+1] - ii[i];
2257     const PetscInt    *icols = jj + ii[i];
2258     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2259       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2260       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2261     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2262       PetscInt j;
2263       for (j=0; j<ncols; j++) {
2264         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2265         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2266       }
2267     }
2268   }
2269 
2270   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2271   nooffprocentries    = B->nooffprocentries;
2272   B->nooffprocentries = PETSC_TRUE;
2273   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2274   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2275   B->nooffprocentries = nooffprocentries;
2276 
2277   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 /*MC
2282    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2283    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2284    the matrix is stored.
2285 
2286    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2287    can call MatSetOption(Mat, MAT_HERMITIAN);
2288 
2289    Options Database Keys:
2290 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2291 
2292    Notes:
2293      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2294      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2295 
2296    Level: beginner
2297 
2298 .seealso: MatCreateMPISBAIJ(), MATSEQSBAIJ, MatType
2299 M*/
2300 
2301 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2302 {
2303   Mat_MPISBAIJ   *b;
2304   PetscErrorCode ierr;
2305   PetscBool      flg = PETSC_FALSE;
2306 
2307   PetscFunctionBegin;
2308   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2309   B->data = (void*)b;
2310   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2311 
2312   B->ops->destroy = MatDestroy_MPISBAIJ;
2313   B->ops->view    = MatView_MPISBAIJ;
2314   B->assembled    = PETSC_FALSE;
2315   B->insertmode   = NOT_SET_VALUES;
2316 
2317   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2318   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2319 
2320   /* build local table of row and column ownerships */
2321   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2322 
2323   /* build cache for off array entries formed */
2324   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2325 
2326   b->donotstash  = PETSC_FALSE;
2327   b->colmap      = NULL;
2328   b->garray      = NULL;
2329   b->roworiented = PETSC_TRUE;
2330 
2331   /* stuff used in block assembly */
2332   b->barray = 0;
2333 
2334   /* stuff used for matrix vector multiply */
2335   b->lvec    = 0;
2336   b->Mvctx   = 0;
2337   b->slvec0  = 0;
2338   b->slvec0b = 0;
2339   b->slvec1  = 0;
2340   b->slvec1a = 0;
2341   b->slvec1b = 0;
2342   b->sMvctx  = 0;
2343 
2344   /* stuff for MatGetRow() */
2345   b->rowindices   = 0;
2346   b->rowvalues    = 0;
2347   b->getrowactive = PETSC_FALSE;
2348 
2349   /* hash table stuff */
2350   b->ht           = 0;
2351   b->hd           = 0;
2352   b->ht_size      = 0;
2353   b->ht_flag      = PETSC_FALSE;
2354   b->ht_fact      = 0;
2355   b->ht_total_ct  = 0;
2356   b->ht_insert_ct = 0;
2357 
2358   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2359   b->ijonly = PETSC_FALSE;
2360 
2361   b->in_loc = 0;
2362   b->v_loc  = 0;
2363   b->n_loc  = 0;
2364 
2365   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2366   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2367   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2368   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2369 #if defined(PETSC_HAVE_ELEMENTAL)
2370   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2371 #endif
2372   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2373   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2374 
2375   B->symmetric                  = PETSC_TRUE;
2376   B->structurally_symmetric     = PETSC_TRUE;
2377   B->symmetric_set              = PETSC_TRUE;
2378   B->structurally_symmetric_set = PETSC_TRUE;
2379   B->symmetric_eternal          = PETSC_TRUE;
2380 
2381   B->hermitian                  = PETSC_FALSE;
2382   B->hermitian_set              = PETSC_FALSE;
2383 
2384   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2385   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2386   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2387   if (flg) {
2388     PetscReal fact = 1.39;
2389     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2390     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2391     if (fact <= 1.0) fact = 1.39;
2392     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2393     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2394   }
2395   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 /*MC
2400    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2401 
2402    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2403    and MATMPISBAIJ otherwise.
2404 
2405    Options Database Keys:
2406 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2407 
2408   Level: beginner
2409 
2410 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2411 M*/
2412 
2413 /*@C
2414    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2415    the user should preallocate the matrix storage by setting the parameters
2416    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2417    performance can be increased by more than a factor of 50.
2418 
2419    Collective on Mat
2420 
2421    Input Parameters:
2422 +  B - the matrix
2423 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2424           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2425 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2426            submatrix  (same for all local rows)
2427 .  d_nnz - array containing the number of block nonzeros in the various block rows
2428            in the upper triangular and diagonal part of the in diagonal portion of the local
2429            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2430            for the diagonal entry and set a value even if it is zero.
2431 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2432            submatrix (same for all local rows).
2433 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2434            off-diagonal portion of the local submatrix that is right of the diagonal
2435            (possibly different for each block row) or NULL.
2436 
2437 
2438    Options Database Keys:
2439 +   -mat_no_unroll - uses code that does not unroll the loops in the
2440                      block calculations (much slower)
2441 -   -mat_block_size - size of the blocks to use
2442 
2443    Notes:
2444 
2445    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2446    than it must be used on all processors that share the object for that argument.
2447 
2448    If the *_nnz parameter is given then the *_nz parameter is ignored
2449 
2450    Storage Information:
2451    For a square global matrix we define each processor's diagonal portion
2452    to be its local rows and the corresponding columns (a square submatrix);
2453    each processor's off-diagonal portion encompasses the remainder of the
2454    local matrix (a rectangular submatrix).
2455 
2456    The user can specify preallocated storage for the diagonal part of
2457    the local submatrix with either d_nz or d_nnz (not both).  Set
2458    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2459    memory allocation.  Likewise, specify preallocated storage for the
2460    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2461 
2462    You can call MatGetInfo() to get information on how effective the preallocation was;
2463    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2464    You can also run with the option -info and look for messages with the string
2465    malloc in them to see if additional memory allocation was needed.
2466 
2467    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2468    the figure below we depict these three local rows and all columns (0-11).
2469 
2470 .vb
2471            0 1 2 3 4 5 6 7 8 9 10 11
2472           --------------------------
2473    row 3  |. . . d d d o o o o  o  o
2474    row 4  |. . . d d d o o o o  o  o
2475    row 5  |. . . d d d o o o o  o  o
2476           --------------------------
2477 .ve
2478 
2479    Thus, any entries in the d locations are stored in the d (diagonal)
2480    submatrix, and any entries in the o locations are stored in the
2481    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2482    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2483 
2484    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2485    plus the diagonal part of the d matrix,
2486    and o_nz should indicate the number of block nonzeros per row in the o matrix
2487 
2488    In general, for PDE problems in which most nonzeros are near the diagonal,
2489    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2490    or you will get TERRIBLE performance; see the users' manual chapter on
2491    matrices.
2492 
2493    Level: intermediate
2494 
2495 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2496 @*/
2497 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2498 {
2499   PetscErrorCode ierr;
2500 
2501   PetscFunctionBegin;
2502   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2503   PetscValidType(B,1);
2504   PetscValidLogicalCollectiveInt(B,bs,2);
2505   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 /*@C
2510    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2511    (block compressed row).  For good matrix assembly performance
2512    the user should preallocate the matrix storage by setting the parameters
2513    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2514    performance can be increased by more than a factor of 50.
2515 
2516    Collective
2517 
2518    Input Parameters:
2519 +  comm - MPI communicator
2520 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2521           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2522 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2523            This value should be the same as the local size used in creating the
2524            y vector for the matrix-vector product y = Ax.
2525 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2526            This value should be the same as the local size used in creating the
2527            x vector for the matrix-vector product y = Ax.
2528 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2529 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2530 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2531            submatrix  (same for all local rows)
2532 .  d_nnz - array containing the number of block nonzeros in the various block rows
2533            in the upper triangular portion of the in diagonal portion of the local
2534            (possibly different for each block block row) or NULL.
2535            If you plan to factor the matrix you must leave room for the diagonal entry and
2536            set its value even if it is zero.
2537 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2538            submatrix (same for all local rows).
2539 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2540            off-diagonal portion of the local submatrix (possibly different for
2541            each block row) or NULL.
2542 
2543    Output Parameter:
2544 .  A - the matrix
2545 
2546    Options Database Keys:
2547 +   -mat_no_unroll - uses code that does not unroll the loops in the
2548                      block calculations (much slower)
2549 .   -mat_block_size - size of the blocks to use
2550 -   -mat_mpi - use the parallel matrix data structures even on one processor
2551                (defaults to using SeqBAIJ format on one processor)
2552 
2553    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2554    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2555    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2556 
2557    Notes:
2558    The number of rows and columns must be divisible by blocksize.
2559    This matrix type does not support complex Hermitian operation.
2560 
2561    The user MUST specify either the local or global matrix dimensions
2562    (possibly both).
2563 
2564    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2565    than it must be used on all processors that share the object for that argument.
2566 
2567    If the *_nnz parameter is given then the *_nz parameter is ignored
2568 
2569    Storage Information:
2570    For a square global matrix we define each processor's diagonal portion
2571    to be its local rows and the corresponding columns (a square submatrix);
2572    each processor's off-diagonal portion encompasses the remainder of the
2573    local matrix (a rectangular submatrix).
2574 
2575    The user can specify preallocated storage for the diagonal part of
2576    the local submatrix with either d_nz or d_nnz (not both).  Set
2577    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2578    memory allocation.  Likewise, specify preallocated storage for the
2579    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2580 
2581    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2582    the figure below we depict these three local rows and all columns (0-11).
2583 
2584 .vb
2585            0 1 2 3 4 5 6 7 8 9 10 11
2586           --------------------------
2587    row 3  |. . . d d d o o o o  o  o
2588    row 4  |. . . d d d o o o o  o  o
2589    row 5  |. . . d d d o o o o  o  o
2590           --------------------------
2591 .ve
2592 
2593    Thus, any entries in the d locations are stored in the d (diagonal)
2594    submatrix, and any entries in the o locations are stored in the
2595    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2596    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2597 
2598    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2599    plus the diagonal part of the d matrix,
2600    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2601    In general, for PDE problems in which most nonzeros are near the diagonal,
2602    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2603    or you will get TERRIBLE performance; see the users' manual chapter on
2604    matrices.
2605 
2606    Level: intermediate
2607 
2608 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2609 @*/
2610 
2611 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2612 {
2613   PetscErrorCode ierr;
2614   PetscMPIInt    size;
2615 
2616   PetscFunctionBegin;
2617   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2618   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2619   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2620   if (size > 1) {
2621     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2622     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2623   } else {
2624     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2625     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2626   }
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 
2631 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2632 {
2633   Mat            mat;
2634   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2635   PetscErrorCode ierr;
2636   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2637   PetscScalar    *array;
2638 
2639   PetscFunctionBegin;
2640   *newmat = 0;
2641 
2642   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2643   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2644   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2645   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2646   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2647 
2648   mat->factortype   = matin->factortype;
2649   mat->preallocated = PETSC_TRUE;
2650   mat->assembled    = PETSC_TRUE;
2651   mat->insertmode   = NOT_SET_VALUES;
2652 
2653   a      = (Mat_MPISBAIJ*)mat->data;
2654   a->bs2 = oldmat->bs2;
2655   a->mbs = oldmat->mbs;
2656   a->nbs = oldmat->nbs;
2657   a->Mbs = oldmat->Mbs;
2658   a->Nbs = oldmat->Nbs;
2659 
2660   a->size         = oldmat->size;
2661   a->rank         = oldmat->rank;
2662   a->donotstash   = oldmat->donotstash;
2663   a->roworiented  = oldmat->roworiented;
2664   a->rowindices   = 0;
2665   a->rowvalues    = 0;
2666   a->getrowactive = PETSC_FALSE;
2667   a->barray       = 0;
2668   a->rstartbs     = oldmat->rstartbs;
2669   a->rendbs       = oldmat->rendbs;
2670   a->cstartbs     = oldmat->cstartbs;
2671   a->cendbs       = oldmat->cendbs;
2672 
2673   /* hash table stuff */
2674   a->ht           = 0;
2675   a->hd           = 0;
2676   a->ht_size      = 0;
2677   a->ht_flag      = oldmat->ht_flag;
2678   a->ht_fact      = oldmat->ht_fact;
2679   a->ht_total_ct  = 0;
2680   a->ht_insert_ct = 0;
2681 
2682   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2683   if (oldmat->colmap) {
2684 #if defined(PETSC_USE_CTABLE)
2685     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2686 #else
2687     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2688     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2689     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2690 #endif
2691   } else a->colmap = 0;
2692 
2693   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2694     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2695     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2696     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2697   } else a->garray = 0;
2698 
2699   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2700   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2701   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2702   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2703   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2704 
2705   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2706   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2707   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2708   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2709 
2710   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2711   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2712   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2713   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2714   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2715   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2716   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2717   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2718   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2719   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2720   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2721   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2722   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2723 
2724   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2725   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2726   a->sMvctx = oldmat->sMvctx;
2727   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2728 
2729   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2730   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2731   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2732   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2733   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2734   *newmat = mat;
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2739 {
2740   PetscErrorCode ierr;
2741   PetscInt       i,nz,j,rstart,rend;
2742   PetscScalar    *vals,*buf;
2743   MPI_Comm       comm;
2744   MPI_Status     status;
2745   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2746   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2747   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2748   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2749   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2750   PetscInt       dcount,kmax,k,nzcount,tmp;
2751   int            fd;
2752   PetscBool      isbinary;
2753 
2754   PetscFunctionBegin;
2755   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2756   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2757 
2758   /* force binary viewer to load .info file if it has not yet done so */
2759   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2760   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2761   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2762   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2763   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2764   if (bs < 0) bs = 1;
2765 
2766   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2767   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2768   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2769   if (!rank) {
2770     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2771     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2772     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2773   }
2774 
2775   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2776   M    = header[1];
2777   N    = header[2];
2778 
2779   /* If global sizes are set, check if they are consistent with that given in the file */
2780   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2781   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2782 
2783   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2784 
2785   /*
2786      This code adds extra rows to make sure the number of rows is
2787      divisible by the blocksize
2788   */
2789   Mbs        = M/bs;
2790   extra_rows = bs - M + bs*(Mbs);
2791   if (extra_rows == bs) extra_rows = 0;
2792   else                  Mbs++;
2793   if (extra_rows &&!rank) {
2794     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2795   }
2796 
2797   /* determine ownership of all rows */
2798   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2799     mbs = Mbs/size + ((Mbs % size) > rank);
2800     m   = mbs*bs;
2801   } else { /* User Set */
2802     m   = newmat->rmap->n;
2803     mbs = m/bs;
2804   }
2805   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2806   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2807   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2808   rowners[0] = 0;
2809   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2810   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2811   rstart = rowners[rank];
2812   rend   = rowners[rank+1];
2813 
2814   /* distribute row lengths to all processors */
2815   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2816   if (!rank) {
2817     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2818     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
2819     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2820     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2821     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2822     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2823     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2824   } else {
2825     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2826   }
2827 
2828   if (!rank) {   /* procs[0] */
2829     /* calculate the number of nonzeros on each processor */
2830     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
2831     for (i=0; i<size; i++) {
2832       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2833         procsnz[i] += rowlengths[j];
2834       }
2835     }
2836     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2837 
2838     /* determine max buffer needed and allocate it */
2839     maxnz = 0;
2840     for (i=0; i<size; i++) {
2841       maxnz = PetscMax(maxnz,procsnz[i]);
2842     }
2843     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2844 
2845     /* read in my part of the matrix column indices  */
2846     nz     = procsnz[0];
2847     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2848     mycols = ibuf;
2849     if (size == 1) nz -= extra_rows;
2850     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2851     if (size == 1) {
2852       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2853     }
2854 
2855     /* read in every ones (except the last) and ship off */
2856     for (i=1; i<size-1; i++) {
2857       nz   = procsnz[i];
2858       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2859       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2860     }
2861     /* read in the stuff for the last proc */
2862     if (size != 1) {
2863       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2864       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2865       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2866       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2867     }
2868     ierr = PetscFree(cols);CHKERRQ(ierr);
2869   } else {  /* procs[i], i>0 */
2870     /* determine buffer space needed for message */
2871     nz = 0;
2872     for (i=0; i<m; i++) nz += locrowlens[i];
2873     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2874     mycols = ibuf;
2875     /* receive message of column indices*/
2876     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2877     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2878     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2879   }
2880 
2881   /* loop over local rows, determining number of off diagonal entries */
2882   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2883   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2884   rowcount = 0;
2885   nzcount  = 0;
2886   for (i=0; i<mbs; i++) {
2887     dcount  = 0;
2888     odcount = 0;
2889     for (j=0; j<bs; j++) {
2890       kmax = locrowlens[rowcount];
2891       for (k=0; k<kmax; k++) {
2892         tmp = mycols[nzcount++]/bs; /* block col. index */
2893         if (!mask[tmp]) {
2894           mask[tmp] = 1;
2895           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2896           else masked1[dcount++] = tmp; /* entry in diag portion */
2897         }
2898       }
2899       rowcount++;
2900     }
2901 
2902     dlens[i]  = dcount;  /* d_nzz[i] */
2903     odlens[i] = odcount; /* o_nzz[i] */
2904 
2905     /* zero out the mask elements we set */
2906     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2907     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2908   }
2909   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2910   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2911   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2912 
2913   if (!rank) {
2914     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2915     /* read in my part of the matrix numerical values  */
2916     nz     = procsnz[0];
2917     vals   = buf;
2918     mycols = ibuf;
2919     if (size == 1) nz -= extra_rows;
2920     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2921     if (size == 1) {
2922       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2923     }
2924 
2925     /* insert into matrix */
2926     jj = rstart*bs;
2927     for (i=0; i<m; i++) {
2928       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2929       mycols += locrowlens[i];
2930       vals   += locrowlens[i];
2931       jj++;
2932     }
2933 
2934     /* read in other processors (except the last one) and ship out */
2935     for (i=1; i<size-1; i++) {
2936       nz   = procsnz[i];
2937       vals = buf;
2938       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2939       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2940     }
2941     /* the last proc */
2942     if (size != 1) {
2943       nz   = procsnz[i] - extra_rows;
2944       vals = buf;
2945       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2946       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2947       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2948     }
2949     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2950 
2951   } else {
2952     /* receive numeric values */
2953     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2954 
2955     /* receive message of values*/
2956     vals   = buf;
2957     mycols = ibuf;
2958     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2959     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2960     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2961 
2962     /* insert into matrix */
2963     jj = rstart*bs;
2964     for (i=0; i<m; i++) {
2965       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2966       mycols += locrowlens[i];
2967       vals   += locrowlens[i];
2968       jj++;
2969     }
2970   }
2971 
2972   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2973   ierr = PetscFree(buf);CHKERRQ(ierr);
2974   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2975   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2976   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2977   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2978   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2979   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 /*XXXXX@
2984    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2985 
2986    Input Parameters:
2987 .  mat  - the matrix
2988 .  fact - factor
2989 
2990    Not Collective on Mat, each process can have a different hash factor
2991 
2992    Level: advanced
2993 
2994   Notes:
2995    This can also be set by the command line option: -mat_use_hash_table fact
2996 
2997 .seealso: MatSetOption()
2998 @XXXXX*/
2999 
3000 
3001 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
3002 {
3003   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
3004   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
3005   PetscReal      atmp;
3006   PetscReal      *work,*svalues,*rvalues;
3007   PetscErrorCode ierr;
3008   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
3009   PetscMPIInt    rank,size;
3010   PetscInt       *rowners_bs,dest,count,source;
3011   PetscScalar    *va;
3012   MatScalar      *ba;
3013   MPI_Status     stat;
3014 
3015   PetscFunctionBegin;
3016   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3017   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
3018   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3019 
3020   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3021   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3022 
3023   bs  = A->rmap->bs;
3024   mbs = a->mbs;
3025   Mbs = a->Mbs;
3026   ba  = b->a;
3027   bi  = b->i;
3028   bj  = b->j;
3029 
3030   /* find ownerships */
3031   rowners_bs = A->rmap->range;
3032 
3033   /* each proc creates an array to be distributed */
3034   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3035 
3036   /* row_max for B */
3037   if (rank != size-1) {
3038     for (i=0; i<mbs; i++) {
3039       ncols = bi[1] - bi[0]; bi++;
3040       brow  = bs*i;
3041       for (j=0; j<ncols; j++) {
3042         bcol = bs*(*bj);
3043         for (kcol=0; kcol<bs; kcol++) {
3044           col  = bcol + kcol;                /* local col index */
3045           col += rowners_bs[rank+1];      /* global col index */
3046           for (krow=0; krow<bs; krow++) {
3047             atmp = PetscAbsScalar(*ba); ba++;
3048             row  = brow + krow;   /* local row index */
3049             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3050             if (work[col] < atmp) work[col] = atmp;
3051           }
3052         }
3053         bj++;
3054       }
3055     }
3056 
3057     /* send values to its owners */
3058     for (dest=rank+1; dest<size; dest++) {
3059       svalues = work + rowners_bs[dest];
3060       count   = rowners_bs[dest+1]-rowners_bs[dest];
3061       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3062     }
3063   }
3064 
3065   /* receive values */
3066   if (rank) {
3067     rvalues = work;
3068     count   = rowners_bs[rank+1]-rowners_bs[rank];
3069     for (source=0; source<rank; source++) {
3070       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3071       /* process values */
3072       for (i=0; i<count; i++) {
3073         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3074       }
3075     }
3076   }
3077 
3078   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3079   ierr = PetscFree(work);CHKERRQ(ierr);
3080   PetscFunctionReturn(0);
3081 }
3082 
3083 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3084 {
3085   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3086   PetscErrorCode    ierr;
3087   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3088   PetscScalar       *x,*ptr,*from;
3089   Vec               bb1;
3090   const PetscScalar *b;
3091 
3092   PetscFunctionBegin;
3093   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3094   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3095 
3096   if (flag == SOR_APPLY_UPPER) {
3097     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3098     PetscFunctionReturn(0);
3099   }
3100 
3101   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3102     if (flag & SOR_ZERO_INITIAL_GUESS) {
3103       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3104       its--;
3105     }
3106 
3107     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3108     while (its--) {
3109 
3110       /* lower triangular part: slvec0b = - B^T*xx */
3111       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3112 
3113       /* copy xx into slvec0a */
3114       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3115       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3116       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
3117       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3118 
3119       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3120 
3121       /* copy bb into slvec1a */
3122       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3123       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3124       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
3125       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3126 
3127       /* set slvec1b = 0 */
3128       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3129 
3130       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3131       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3132       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3133       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3134 
3135       /* upper triangular part: bb1 = bb1 - B*x */
3136       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3137 
3138       /* local diagonal sweep */
3139       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3140     }
3141     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3142   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3143     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3144   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3145     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3146   } else if (flag & SOR_EISENSTAT) {
3147     Vec               xx1;
3148     PetscBool         hasop;
3149     const PetscScalar *diag;
3150     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3151     PetscInt          i,n;
3152 
3153     if (!mat->xx1) {
3154       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3155       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3156     }
3157     xx1 = mat->xx1;
3158     bb1 = mat->bb1;
3159 
3160     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3161 
3162     if (!mat->diag) {
3163       /* this is wrong for same matrix with new nonzero values */
3164       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3165       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3166     }
3167     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3168 
3169     if (hasop) {
3170       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3171       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3172     } else {
3173       /*
3174           These two lines are replaced by code that may be a bit faster for a good compiler
3175       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3176       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3177       */
3178       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3179       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3180       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3181       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3182       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3183       if (omega == 1.0) {
3184         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3185         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3186       } else {
3187         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3188         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3189       }
3190       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3191       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3192       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3193       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3194     }
3195 
3196     /* multiply off-diagonal portion of matrix */
3197     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3198     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3199     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3200     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3201     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
3202     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3203     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3204     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3205     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3206     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3207 
3208     /* local sweep */
3209     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3210     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3211   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3212   PetscFunctionReturn(0);
3213 }
3214 
3215 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3216 {
3217   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3218   PetscErrorCode ierr;
3219   Vec            lvec1,bb1;
3220 
3221   PetscFunctionBegin;
3222   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3223   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3224 
3225   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3226     if (flag & SOR_ZERO_INITIAL_GUESS) {
3227       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3228       its--;
3229     }
3230 
3231     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3232     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3233     while (its--) {
3234       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235 
3236       /* lower diagonal part: bb1 = bb - B^T*xx */
3237       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3238       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3239 
3240       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3241       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3242       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3243 
3244       /* upper diagonal part: bb1 = bb1 - B*x */
3245       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3246       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3247 
3248       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3249 
3250       /* diagonal sweep */
3251       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3252     }
3253     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3254     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3255   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3256   PetscFunctionReturn(0);
3257 }
3258 
3259 /*@
3260      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3261          CSR format the local rows.
3262 
3263    Collective
3264 
3265    Input Parameters:
3266 +  comm - MPI communicator
3267 .  bs - the block size, only a block size of 1 is supported
3268 .  m - number of local rows (Cannot be PETSC_DECIDE)
3269 .  n - This value should be the same as the local size used in creating the
3270        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3271        calculated if N is given) For square matrices n is almost always m.
3272 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3273 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3274 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3275 .   j - column indices
3276 -   a - matrix values
3277 
3278    Output Parameter:
3279 .   mat - the matrix
3280 
3281    Level: intermediate
3282 
3283    Notes:
3284        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3285      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3286      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3287 
3288        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3289 
3290 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3291           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3292 @*/
3293 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3294 {
3295   PetscErrorCode ierr;
3296 
3297 
3298   PetscFunctionBegin;
3299   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3300   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3301   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3302   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3303   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3304   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3305   PetscFunctionReturn(0);
3306 }
3307 
3308 
3309 /*@C
3310    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
3311 
3312    Collective
3313 
3314    Input Parameters:
3315 +  B - the matrix
3316 .  bs - the block size
3317 .  i - the indices into j for the start of each local row (starts with zero)
3318 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3319 -  v - optional values in the matrix
3320 
3321    Level: advanced
3322 
3323    Notes:
3324    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
3325    and usually the numerical values as well
3326 
3327    Any entries below the diagonal are ignored
3328 
3329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3330 @*/
3331 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3332 {
3333   PetscErrorCode ierr;
3334 
3335   PetscFunctionBegin;
3336   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3337   PetscFunctionReturn(0);
3338 }
3339 
3340 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3341 {
3342   PetscErrorCode ierr;
3343   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3344   PetscInt       *indx;
3345   PetscScalar    *values;
3346 
3347   PetscFunctionBegin;
3348   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3349   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3350     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3351     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3352     PetscInt       *bindx,rmax=a->rmax,j;
3353     PetscMPIInt    rank,size;
3354 
3355     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3356     mbs = m/bs; Nbs = N/cbs;
3357     if (n == PETSC_DECIDE) {
3358       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3359     }
3360     nbs = n/cbs;
3361 
3362     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3363     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3364 
3365     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3366     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3367     if (rank == size-1) {
3368       /* Check sum(nbs) = Nbs */
3369       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3370     }
3371 
3372     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3373     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3374     for (i=0; i<mbs; i++) {
3375       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3376       nnz  = nnz/bs;
3377       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3378       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3379       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3380     }
3381     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3382     ierr = PetscFree(bindx);CHKERRQ(ierr);
3383 
3384     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3385     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3386     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3387     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
3388     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3389     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3390     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3391   }
3392 
3393   /* numeric phase */
3394   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3395   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3396 
3397   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3398   for (i=0; i<m; i++) {
3399     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3400     Ii   = i + rstart;
3401     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3402     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3403   }
3404   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3405   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3406   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3407   PetscFunctionReturn(0);
3408 }
3409