xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 3d6a3516742e32e485e84f6dcdca1b62f17153b7)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
78     ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
79     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
80   } else B = *newmat;
81 
82   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
84     PetscInt          ncols;
85     const PetscInt    *row;
86     const PetscScalar *vals;
87 
88     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
89     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
90     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
91     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
92   }
93   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
94   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96 
97   if (reuse == MAT_INPLACE_MATRIX) {
98     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
99   } else {
100     *newmat = B;
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
106 {
107   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
112   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
117 {
118   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
123   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
128   { \
129  \
130     brow = row/bs;  \
131     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
132     rmax = aimax[brow]; nrow = ailen[brow]; \
133     bcol = col/bs; \
134     ridx = row % bs; cidx = col % bs; \
135     low  = 0; high = nrow; \
136     while (high-low > 3) { \
137       t = (low+high)/2; \
138       if (rp[t] > bcol) high = t; \
139       else              low  = t; \
140     } \
141     for (_i=low; _i<high; _i++) { \
142       if (rp[_i] > bcol) break; \
143       if (rp[_i] == bcol) { \
144         bap = ap + bs2*_i + bs*cidx + ridx; \
145         if (addv == ADD_VALUES) *bap += value;  \
146         else                    *bap  = value;  \
147         goto a_noinsert; \
148       } \
149     } \
150     if (a->nonew == 1) goto a_noinsert; \
151     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
152     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
153     N = nrow++ - 1;  \
154     /* shift up all the later entries in this row */ \
155     for (ii=N; ii>=_i; ii--) { \
156       rp[ii+1] = rp[ii]; \
157       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
158     } \
159     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
160     rp[_i]                      = bcol;  \
161     ap[bs2*_i + bs*cidx + ridx] = value;  \
162     A->nonzerostate++;\
163 a_noinsert:; \
164     ailen[brow] = nrow; \
165   }
166 
167 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
168   { \
169     brow = row/bs;  \
170     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171     rmax = bimax[brow]; nrow = bilen[brow]; \
172     bcol = col/bs; \
173     ridx = row % bs; cidx = col % bs; \
174     low  = 0; high = nrow; \
175     while (high-low > 3) { \
176       t = (low+high)/2; \
177       if (rp[t] > bcol) high = t; \
178       else              low  = t; \
179     } \
180     for (_i=low; _i<high; _i++) { \
181       if (rp[_i] > bcol) break; \
182       if (rp[_i] == bcol) { \
183         bap = ap + bs2*_i + bs*cidx + ridx; \
184         if (addv == ADD_VALUES) *bap += value;  \
185         else                    *bap  = value;  \
186         goto b_noinsert; \
187       } \
188     } \
189     if (b->nonew == 1) goto b_noinsert; \
190     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
191     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
192     N = nrow++ - 1;  \
193     /* shift up all the later entries in this row */ \
194     for (ii=N; ii>=_i; ii--) { \
195       rp[ii+1] = rp[ii]; \
196       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
197     } \
198     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
199     rp[_i]                      = bcol;  \
200     ap[bs2*_i + bs*cidx + ridx] = value;  \
201     B->nonzerostate++;\
202 b_noinsert:; \
203     bilen[brow] = nrow; \
204   }
205 
206 /* Only add/insert a(i,j) with i<=j (blocks).
207    Any a(i,j) with i>j input by user is ingored.
208 */
209 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscBool      roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap->rstart;
217   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
218   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
219 
220   /* Some Variables required in the macro */
221   Mat          A     = baij->A;
222   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar    *aa   =a->a;
225 
226   Mat         B     = baij->B;
227   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar   *ba   =b->a;
230 
231   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar *ap,*bap;
234 
235   /* for stash */
236   PetscInt  n_loc, *in_loc = NULL;
237   MatScalar *v_loc = NULL;
238 
239   PetscFunctionBegin;
240   if (!baij->donotstash) {
241     if (n > baij->n_loc) {
242       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
243       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
244       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
246 
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs) {
262           if (a->ignore_ltriangular) {
263             continue;    /* ignore lower triangular blocks */
264           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
265         }
266         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
267           col  = in[j] - cstart_orig;         /* local col index */
268           brow = row/bs; bcol = col/bs;
269           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
270           if (roworiented) value = v[i*n+j];
271           else             value = v[i+j*m];
272           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
273           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
274         } else if (in[j] < 0) continue;
275 #if defined(PETSC_USE_DEBUG)
276         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
277 #endif
278         else {  /* off-diag entry (B) */
279           if (mat->was_assembled) {
280             if (!baij->colmap) {
281               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282             }
283 #if defined(PETSC_USE_CTABLE)
284             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
285             col  = col - 1;
286 #else
287             col = baij->colmap[in[j]/bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
290               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
291               col  =  in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B    = baij->B;
294               b    = (Mat_SeqBAIJ*)(B)->data;
295               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
296               ba   = b->a;
297             } else col += in[j]%bs;
298           } else col = in[j];
299           if (roworiented) value = v[i*n+j];
300           else             value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
307       if (!baij->donotstash) {
308         mat->assembled = PETSC_FALSE;
309         n_loc          = 0;
310         for (j=0; j<n; j++) {
311           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
312           in_loc[n_loc] = in[j];
313           if (roworiented) {
314             v_loc[n_loc] = v[i*n+j];
315           } else {
316             v_loc[n_loc] = v[j*m+i];
317           }
318           n_loc++;
319         }
320         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
321       }
322     }
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
328 {
329   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
330   PetscErrorCode    ierr;
331   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
332   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
333   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
334   PetscBool         roworiented=a->roworiented;
335   const PetscScalar *value     = v;
336   MatScalar         *ap,*aa = a->a,*bap;
337 
338   PetscFunctionBegin;
339   if (col < row) {
340     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
341     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
342   }
343   rp   = aj + ai[row];
344   ap   = aa + bs2*ai[row];
345   rmax = imax[row];
346   nrow = ailen[row];
347   value = v;
348   low   = 0;
349   high  = nrow;
350 
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++) {
377             for (jj=0; jj<bs; jj++) {
378               *bap++ += *value++;
379             }
380           }
381         } else {
382           for (ii=0; ii<bs; ii++) {
383             for (jj=0; jj<bs; jj++) {
384               *bap++  = *value++;
385             }
386           }
387         }
388       }
389       goto noinsert2;
390     }
391   }
392   if (nonew == 1) goto noinsert2;
393   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
394   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
395   N = nrow++ - 1; high++;
396   /* shift up all the later entries in this row */
397   for (ii=N; ii>=i; ii--) {
398     rp[ii+1] = rp[ii];
399     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
400   }
401   if (N >= i) {
402     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
403   }
404   rp[i] = col;
405   bap   = ap +  bs2*i;
406   if (roworiented) {
407     for (ii=0; ii<bs; ii++) {
408       for (jj=ii; jj<bs2; jj+=bs) {
409         bap[jj] = *value++;
410       }
411     }
412   } else {
413     for (ii=0; ii<bs; ii++) {
414       for (jj=0; jj<bs; jj++) {
415         *bap++ = *value++;
416       }
417     }
418   }
419   noinsert2:;
420   ailen[row] = nrow;
421   PetscFunctionReturn(0);
422 }
423 
424 /*
425    This routine is exactly duplicated in mpibaij.c
426 */
427 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
428 {
429   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
430   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
431   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
432   PetscErrorCode    ierr;
433   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
434   PetscBool         roworiented=a->roworiented;
435   const PetscScalar *value     = v;
436   MatScalar         *ap,*aa = a->a,*bap;
437 
438   PetscFunctionBegin;
439   rp   = aj + ai[row];
440   ap   = aa + bs2*ai[row];
441   rmax = imax[row];
442   nrow = ailen[row];
443   low  = 0;
444   high = nrow;
445   value = v;
446   while (high-low > 7) {
447     t = (low+high)/2;
448     if (rp[t] > col) high = t;
449     else             low  = t;
450   }
451   for (i=low; i<high; i++) {
452     if (rp[i] > col) break;
453     if (rp[i] == col) {
454       bap = ap +  bs2*i;
455       if (roworiented) {
456         if (is == ADD_VALUES) {
457           for (ii=0; ii<bs; ii++) {
458             for (jj=ii; jj<bs2; jj+=bs) {
459               bap[jj] += *value++;
460             }
461           }
462         } else {
463           for (ii=0; ii<bs; ii++) {
464             for (jj=ii; jj<bs2; jj+=bs) {
465               bap[jj] = *value++;
466             }
467           }
468         }
469       } else {
470         if (is == ADD_VALUES) {
471           for (ii=0; ii<bs; ii++,value+=bs) {
472             for (jj=0; jj<bs; jj++) {
473               bap[jj] += value[jj];
474             }
475             bap += bs;
476           }
477         } else {
478           for (ii=0; ii<bs; ii++,value+=bs) {
479             for (jj=0; jj<bs; jj++) {
480               bap[jj]  = value[jj];
481             }
482             bap += bs;
483           }
484         }
485       }
486       goto noinsert2;
487     }
488   }
489   if (nonew == 1) goto noinsert2;
490   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
491   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
492   N = nrow++ - 1; high++;
493   /* shift up all the later entries in this row */
494   for (ii=N; ii>=i; ii--) {
495     rp[ii+1] = rp[ii];
496     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
497   }
498   if (N >= i) {
499     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
500   }
501   rp[i] = col;
502   bap   = ap +  bs2*i;
503   if (roworiented) {
504     for (ii=0; ii<bs; ii++) {
505       for (jj=ii; jj<bs2; jj+=bs) {
506         bap[jj] = *value++;
507       }
508     }
509   } else {
510     for (ii=0; ii<bs; ii++) {
511       for (jj=0; jj<bs; jj++) {
512         *bap++ = *value++;
513       }
514     }
515   }
516   noinsert2:;
517   ailen[row] = nrow;
518   PetscFunctionReturn(0);
519 }
520 
521 /*
522     This routine could be optimized by removing the need for the block copy below and passing stride information
523   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
524 */
525 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
526 {
527   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
528   const MatScalar *value;
529   MatScalar       *barray     =baij->barray;
530   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
531   PetscErrorCode  ierr;
532   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
533   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
534   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
535 
536   PetscFunctionBegin;
537   if (!barray) {
538     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
539     baij->barray = barray;
540   }
541 
542   if (roworiented) {
543     stepval = (n-1)*bs;
544   } else {
545     stepval = (m-1)*bs;
546   }
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row = im[i] - rstart;
554       for (j=0; j<n; j++) {
555         if (im[i] > in[j]) {
556           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
557           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
558         }
559         /* If NumCol = 1 then a copy is not required */
560         if ((roworiented) && (n == 1)) {
561           barray = (MatScalar*) v + i*bs2;
562         } else if ((!roworiented) && (m == 1)) {
563           barray = (MatScalar*) v + j*bs2;
564         } else { /* Here a copy is required */
565           if (roworiented) {
566             value = v + i*(stepval+bs)*bs + j*bs;
567           } else {
568             value = v + j*(stepval+bs)*bs + i*bs;
569           }
570           for (ii=0; ii<bs; ii++,value+=stepval) {
571             for (jj=0; jj<bs; jj++) {
572               *barray++ = *value++;
573             }
574           }
575           barray -=bs2;
576         }
577 
578         if (in[j] >= cstart && in[j] < cend) {
579           col  = in[j] - cstart;
580           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
581         } else if (in[j] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584 #endif
585         else {
586           if (mat->was_assembled) {
587             if (!baij->colmap) {
588               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
589             }
590 
591 #if defined(PETSC_USE_DEBUG)
592 #if defined(PETSC_USE_CTABLE)
593             { PetscInt data;
594               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
595               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
596             }
597 #else
598             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
599 #endif
600 #endif
601 #if defined(PETSC_USE_CTABLE)
602             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
603             col  = (col - 1)/bs;
604 #else
605             col = (baij->colmap[in[j]] - 1)/bs;
606 #endif
607             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
608               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
609               col  = in[j];
610             }
611           } else col = in[j];
612           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
613         }
614       }
615     } else {
616       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
617       if (!baij->donotstash) {
618         if (roworiented) {
619           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
620         } else {
621           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
622         }
623       }
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
632   PetscErrorCode ierr;
633   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
634   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
639     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
641       row = idxm[i] - bsrstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
644         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= bscstart && idxn[j] < bscend) {
646           col  = idxn[j] - bscstart;
647           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
648         } else {
649           if (!baij->colmap) {
650             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
651           }
652 #if defined(PETSC_USE_CTABLE)
653           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
654           data--;
655 #else
656           data = baij->colmap[idxn[j]/bs]-1;
657 #endif
658           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
659           else {
660             col  = data + idxn[j]%bs;
661             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
662           }
663         }
664       }
665     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
666   }
667   PetscFunctionReturn(0);
668 }
669 
670 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
671 {
672   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscReal      sum[2],*lnorm2;
675 
676   PetscFunctionBegin;
677   if (baij->size == 1) {
678     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
679   } else {
680     if (type == NORM_FROBENIUS) {
681       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
682       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
683       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
684       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
685       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
686       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
687       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
688       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
690       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
691       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
692       PetscReal    *rsum,*rsum2,vabs;
693       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
694       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
695       MatScalar    *v;
696 
697       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
698       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
699       /* Amat */
700       v = amat->a; jj = amat->j;
701       for (brow=0; brow<mbs; brow++) {
702         grow = bs*(rstart + brow);
703         nz   = amat->i[brow+1] - amat->i[brow];
704         for (bcol=0; bcol<nz; bcol++) {
705           gcol = bs*(rstart + *jj); jj++;
706           for (col=0; col<bs; col++) {
707             for (row=0; row<bs; row++) {
708               vabs            = PetscAbsScalar(*v); v++;
709               rsum[gcol+col] += vabs;
710               /* non-diagonal block */
711               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
712             }
713           }
714         }
715         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
716       }
717       /* Bmat */
718       v = bmat->a; jj = bmat->j;
719       for (brow=0; brow<mbs; brow++) {
720         grow = bs*(rstart + brow);
721         nz = bmat->i[brow+1] - bmat->i[brow];
722         for (bcol=0; bcol<nz; bcol++) {
723           gcol = bs*garray[*jj]; jj++;
724           for (col=0; col<bs; col++) {
725             for (row=0; row<bs; row++) {
726               vabs            = PetscAbsScalar(*v); v++;
727               rsum[gcol+col] += vabs;
728               rsum[grow+row] += vabs;
729             }
730           }
731         }
732         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
733       }
734       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
735       *norm = 0.0;
736       for (col=0; col<mat->cmap->N; col++) {
737         if (rsum2[col] > *norm) *norm = rsum2[col];
738       }
739       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
740     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749   PetscInt       nstash,reallocs;
750 
751   PetscFunctionBegin;
752   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
753 
754   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
755   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
756   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
757   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
758   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
759   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
764 {
765   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
766   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
767   PetscErrorCode ierr;
768   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
769   PetscInt       *row,*col;
770   PetscBool      other_disassembled;
771   PetscMPIInt    n;
772   PetscBool      r1,r2,r3;
773   MatScalar      *val;
774 
775   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
776   PetscFunctionBegin;
777   if (!baij->donotstash &&  !mat->nooffprocentries) {
778     while (1) {
779       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
780       if (!flg) break;
781 
782       for (i=0; i<n;) {
783         /* Now identify the consecutive vals belonging to the same row */
784         for (j=i,rstart=row[j]; j<n; j++) {
785           if (row[j] != rstart) break;
786         }
787         if (j < n) ncols = j-i;
788         else       ncols = n-i;
789         /* Now assemble all these values with a single function call */
790         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
791         i    = j;
792       }
793     }
794     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
795     /* Now process the block-stash. Since the values are stashed column-oriented,
796        set the roworiented flag to column oriented, and after MatSetValues()
797        restore the original flags */
798     r1 = baij->roworiented;
799     r2 = a->roworiented;
800     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
801 
802     baij->roworiented = PETSC_FALSE;
803     a->roworiented    = PETSC_FALSE;
804 
805     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
806     while (1) {
807       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
808       if (!flg) break;
809 
810       for (i=0; i<n;) {
811         /* Now identify the consecutive vals belonging to the same row */
812         for (j=i,rstart=row[j]; j<n; j++) {
813           if (row[j] != rstart) break;
814         }
815         if (j < n) ncols = j-i;
816         else       ncols = n-i;
817         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
818         i    = j;
819       }
820     }
821     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
822 
823     baij->roworiented = r1;
824     a->roworiented    = r2;
825 
826     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
827   }
828 
829   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
830   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
831 
832   /* determine if any processor has disassembled, if so we must
833      also disassemble ourselfs, in order that we may reassemble. */
834   /*
835      if nonzero structure of submatrix B cannot change then we know that
836      no processor disassembled thus we can skip this stuff
837   */
838   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
839     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
840     if (mat->was_assembled && !other_disassembled) {
841       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
842     }
843   }
844 
845   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
846     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
847   }
848   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
849   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
850 
851   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
852 
853   baij->rowvalues = 0;
854 
855   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
856   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
857     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
858     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
864 #include <petscdraw.h>
865 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
866 {
867   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
868   PetscErrorCode    ierr;
869   PetscInt          bs   = mat->rmap->bs;
870   PetscMPIInt       rank = baij->rank;
871   PetscBool         iascii,isdraw;
872   PetscViewer       sviewer;
873   PetscViewerFormat format;
874 
875   PetscFunctionBegin;
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
877   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
878   if (iascii) {
879     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
880     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
881       MatInfo info;
882       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
883       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
884       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
885       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
886       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
887       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
888       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
890       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
892       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
893       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
894       PetscFunctionReturn(0);
895     } else if (format == PETSC_VIEWER_ASCII_INFO) {
896       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
897       PetscFunctionReturn(0);
898     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
899       PetscFunctionReturn(0);
900     }
901   }
902 
903   if (isdraw) {
904     PetscDraw draw;
905     PetscBool isnull;
906     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
907     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
908     if (isnull) PetscFunctionReturn(0);
909   }
910 
911   {
912     /* assemble the entire matrix onto first processor. */
913     Mat          A;
914     Mat_SeqSBAIJ *Aloc;
915     Mat_SeqBAIJ  *Bloc;
916     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
917     MatScalar    *a;
918     const char   *matname;
919 
920     /* Should this be the same type as mat? */
921     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
922     if (!rank) {
923       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
924     } else {
925       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
926     }
927     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
928     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
929     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
930     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
931 
932     /* copy over the A part */
933     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
934     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
935     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
936 
937     for (i=0; i<mbs; i++) {
938       rvals[0] = bs*(baij->rstartbs + i);
939       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
940       for (j=ai[i]; j<ai[i+1]; j++) {
941         col = (baij->cstartbs+aj[j])*bs;
942         for (k=0; k<bs; k++) {
943           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
944           col++;
945           a += bs;
946         }
947       }
948     }
949     /* copy over the B part */
950     Bloc = (Mat_SeqBAIJ*)baij->B->data;
951     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
952     for (i=0; i<mbs; i++) {
953 
954       rvals[0] = bs*(baij->rstartbs + i);
955       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
956       for (j=ai[i]; j<ai[i+1]; j++) {
957         col = baij->garray[aj[j]]*bs;
958         for (k=0; k<bs; k++) {
959           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
960           col++;
961           a += bs;
962         }
963       }
964     }
965     ierr = PetscFree(rvals);CHKERRQ(ierr);
966     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968     /*
969        Everyone has to call to draw the matrix since the graphics waits are
970        synchronized across all processors that share the PetscDraw object
971     */
972     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
973     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
974     if (!rank) {
975       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
976       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
977     }
978     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
979     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980     ierr = MatDestroy(&A);CHKERRQ(ierr);
981   }
982   PetscFunctionReturn(0);
983 }
984 
985 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
986 {
987   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
988   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
989   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
990   PetscErrorCode ierr;
991   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
992   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
993   int            fd;
994   PetscScalar    *column_values;
995   FILE           *file;
996   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
997   PetscInt       message_count,flowcontrolcount;
998 
999   PetscFunctionBegin;
1000   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1001   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1002   nz   = bs2*(A->nz + B->nz);
1003   rlen = mat->rmap->n;
1004   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1005   if (!rank) {
1006     header[0] = MAT_FILE_CLASSID;
1007     header[1] = mat->rmap->N;
1008     header[2] = mat->cmap->N;
1009 
1010     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1011     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1012     /* get largest number of rows any processor has */
1013     range = mat->rmap->range;
1014     for (i=1; i<size; i++) {
1015       rlen = PetscMax(rlen,range[i+1] - range[i]);
1016     }
1017   } else {
1018     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1019   }
1020 
1021   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1022   /* compute lengths of each row  */
1023   for (i=0; i<a->mbs; i++) {
1024     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1025   }
1026   /* store the row lengths to the file */
1027   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1028   if (!rank) {
1029     MPI_Status status;
1030     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1031     rlen = (range[1] - range[0])/bs;
1032     for (i=0; i<rlen; i++) {
1033       for (j=0; j<bs; j++) {
1034         row_lens[i*bs+j] = bs*crow_lens[i];
1035       }
1036     }
1037     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1038     for (i=1; i<size; i++) {
1039       rlen = (range[i+1] - range[i])/bs;
1040       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1041       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1042       for (k=0; k<rlen; k++) {
1043         for (j=0; j<bs; j++) {
1044           row_lens[k*bs+j] = bs*crow_lens[k];
1045         }
1046       }
1047       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1048     }
1049     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1050     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1051   } else {
1052     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1053     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1054     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1055   }
1056   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1057 
1058   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1059      information needed to make it for each row from a block row. This does require more communication but still not more than
1060      the communication needed for the nonzero values  */
1061   nzmax = nz; /*  space a largest processor needs */
1062   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1063   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1064   cnt   = 0;
1065   for (i=0; i<a->mbs; i++) {
1066     pcnt = cnt;
1067     for (j=B->i[i]; j<B->i[i+1]; j++) {
1068       if ((col = garray[B->j[j]]) > cstart) break;
1069       for (l=0; l<bs; l++) {
1070         column_indices[cnt++] = bs*col+l;
1071       }
1072     }
1073     for (k=A->i[i]; k<A->i[i+1]; k++) {
1074       for (l=0; l<bs; l++) {
1075         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1076       }
1077     }
1078     for (; j<B->i[i+1]; j++) {
1079       for (l=0; l<bs; l++) {
1080         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1081       }
1082     }
1083     len = cnt - pcnt;
1084     for (k=1; k<bs; k++) {
1085       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1086       cnt += len;
1087     }
1088   }
1089   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1090 
1091   /* store the columns to the file */
1092   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1093   if (!rank) {
1094     MPI_Status status;
1095     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1096     for (i=1; i<size; i++) {
1097       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1098       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1099       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1100       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1101     }
1102     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1105     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1106     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1107     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1108   }
1109   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1110 
1111   /* load up the numerical values */
1112   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1113   cnt  = 0;
1114   for (i=0; i<a->mbs; i++) {
1115     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1116     for (j=B->i[i]; j<B->i[i+1]; j++) {
1117       if (garray[B->j[j]] > cstart) break;
1118       for (l=0; l<bs; l++) {
1119         for (ll=0; ll<bs; ll++) {
1120           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1121         }
1122       }
1123       cnt += bs;
1124     }
1125     for (k=A->i[i]; k<A->i[i+1]; k++) {
1126       for (l=0; l<bs; l++) {
1127         for (ll=0; ll<bs; ll++) {
1128           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1129         }
1130       }
1131       cnt += bs;
1132     }
1133     for (; j<B->i[i+1]; j++) {
1134       for (l=0; l<bs; l++) {
1135         for (ll=0; ll<bs; ll++) {
1136           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1137         }
1138       }
1139       cnt += bs;
1140     }
1141     cnt += (bs-1)*rlen;
1142   }
1143   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1144 
1145   /* store the column values to the file */
1146   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1147   if (!rank) {
1148     MPI_Status status;
1149     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1150     for (i=1; i<size; i++) {
1151       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1152       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1153       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1154       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1155     }
1156     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1157   } else {
1158     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1159     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1160     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1161     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1162   }
1163   ierr = PetscFree(column_values);CHKERRQ(ierr);
1164 
1165   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1166   if (file) {
1167     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1173 {
1174   PetscErrorCode ierr;
1175   PetscBool      iascii,isdraw,issocket,isbinary;
1176 
1177   PetscFunctionBegin;
1178   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1179   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1180   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1181   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1182   if (iascii || isdraw || issocket) {
1183     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1184   } else if (isbinary) {
1185     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1191 {
1192   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1198 #endif
1199   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1200   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1201   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1202   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1203 #if defined(PETSC_USE_CTABLE)
1204   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1205 #else
1206   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1207 #endif
1208   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1209   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1210   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1211   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1212   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1213   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1214   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1215   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1216   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1217   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1218   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1219   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1220   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1221   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1222   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1223 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1224   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1225 #endif
1226   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1227   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1228   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1229   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1230 
1231   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1235 #if defined(PETSC_HAVE_ELEMENTAL)
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1244 {
1245   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1246   PetscErrorCode    ierr;
1247   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1248   PetscScalar       *from;
1249   const PetscScalar *x;
1250 
1251   PetscFunctionBegin;
1252   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1253   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1254 
1255   /* diagonal part */
1256   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1257   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1258 
1259   /* subdiagonal part */
1260   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1261 
1262   /* copy x into the vec slvec0 */
1263   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1264   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1265 
1266   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1267   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1268   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1269 
1270   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1271   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1272   /* supperdiagonal part */
1273   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1278 {
1279   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1280   PetscErrorCode    ierr;
1281   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1282   PetscScalar       *from;
1283   const PetscScalar *x;
1284 
1285   PetscFunctionBegin;
1286   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1287   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1288 
1289   /* diagonal part */
1290   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1291   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1292 
1293   /* subdiagonal part */
1294   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1295 
1296   /* copy x into the vec slvec0 */
1297   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1298   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1299 
1300   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1301   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1302   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1303 
1304   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   /* supperdiagonal part */
1307   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315   PetscInt       nt;
1316 
1317   PetscFunctionBegin;
1318   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1319   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1320 
1321   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1322   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1323 
1324   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   /* do diagonal part */
1326   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1327   /* do supperdiagonal part */
1328   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1329   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1330   /* do subdiagonal part */
1331   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1332   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1333   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1338 {
1339   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1340   PetscErrorCode    ierr;
1341   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1342   PetscScalar       *from,zero=0.0;
1343   const PetscScalar *x;
1344 
1345   PetscFunctionBegin;
1346   /*
1347   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1348   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1349   */
1350   /* diagonal part */
1351   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1352   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1353 
1354   /* subdiagonal part */
1355   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1356 
1357   /* copy x into the vec slvec0 */
1358   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1359   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1360   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1361   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1362 
1363   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1364   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1365   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1366 
1367   /* supperdiagonal part */
1368   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1379   /* do diagonal part */
1380   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1381   /* do supperdiagonal part */
1382   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1383   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1384 
1385   /* do subdiagonal part */
1386   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1387   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1397 {
1398   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1403   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1408 {
1409   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1414   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1419 {
1420   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1421   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1422   PetscErrorCode ierr;
1423   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1424   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1425   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1426 
1427   PetscFunctionBegin;
1428   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1429   mat->getrowactive = PETSC_TRUE;
1430 
1431   if (!mat->rowvalues && (idx || v)) {
1432     /*
1433         allocate enough space to hold information from the longest row.
1434     */
1435     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1436     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1437     PetscInt     max = 1,mbs = mat->mbs,tmp;
1438     for (i=0; i<mbs; i++) {
1439       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1440       if (max < tmp) max = tmp;
1441     }
1442     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1443   }
1444 
1445   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1446   lrow = row - brstart;  /* local row index */
1447 
1448   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1449   if (!v)   {pvA = 0; pvB = 0;}
1450   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1451   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1452   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1453   nztot = nzA + nzB;
1454 
1455   cmap = mat->garray;
1456   if (v  || idx) {
1457     if (nztot) {
1458       /* Sort by increasing column numbers, assuming A and B already sorted */
1459       PetscInt imark = -1;
1460       if (v) {
1461         *v = v_p = mat->rowvalues;
1462         for (i=0; i<nzB; i++) {
1463           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1464           else break;
1465         }
1466         imark = i;
1467         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1468         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1469       }
1470       if (idx) {
1471         *idx = idx_p = mat->rowindices;
1472         if (imark > -1) {
1473           for (i=0; i<imark; i++) {
1474             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1475           }
1476         } else {
1477           for (i=0; i<nzB; i++) {
1478             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479             else break;
1480           }
1481           imark = i;
1482         }
1483         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1484         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1485       }
1486     } else {
1487       if (idx) *idx = 0;
1488       if (v)   *v   = 0;
1489     }
1490   }
1491   *nz  = nztot;
1492   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1493   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498 {
1499   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1500 
1501   PetscFunctionBegin;
1502   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1503   baij->getrowactive = PETSC_FALSE;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1508 {
1509   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1510   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1511 
1512   PetscFunctionBegin;
1513   aA->getrow_utriangular = PETSC_TRUE;
1514   PetscFunctionReturn(0);
1515 }
1516 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1517 {
1518   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1519   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1520 
1521   PetscFunctionBegin;
1522   aA->getrow_utriangular = PETSC_FALSE;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1527 {
1528   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1533   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1538 {
1539   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1544   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1549    Input: isrow       - distributed(parallel),
1550           iscol_local - locally owned (seq)
1551 */
1552 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1553 {
1554   PetscErrorCode ierr;
1555   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1556   const PetscInt *ptr1,*ptr2;
1557 
1558   PetscFunctionBegin;
1559   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1560   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1561   if (sz1 > sz2) {
1562     *flg = PETSC_FALSE;
1563     PetscFunctionReturn(0);
1564   }
1565 
1566   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1567   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1568 
1569   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1570   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1571   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1572   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1573   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1574   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1575 
1576   nmatch=0;
1577   k     = 0;
1578   for (i=0; i<sz1; i++){
1579     for (j=k; j<sz2; j++){
1580       if (a1[i] == a2[j]) {
1581         k = j; nmatch++;
1582         break;
1583       }
1584     }
1585   }
1586   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1587   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1588   ierr = PetscFree(a1);CHKERRQ(ierr);
1589   ierr = PetscFree(a2);CHKERRQ(ierr);
1590   if (nmatch < sz1) {
1591     *flg = PETSC_FALSE;
1592   } else {
1593     *flg = PETSC_TRUE;
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1599 {
1600   PetscErrorCode ierr;
1601   IS             iscol_local;
1602   PetscInt       csize;
1603   PetscBool      isequal;
1604 
1605   PetscFunctionBegin;
1606   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1607   if (call == MAT_REUSE_MATRIX) {
1608     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1609     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1610   } else {
1611     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1612     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1613     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1614   }
1615 
1616   /* now call MatCreateSubMatrix_MPIBAIJ() */
1617   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1618   if (call == MAT_INITIAL_MATRIX) {
1619     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1620     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1626 {
1627   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1628   PetscErrorCode ierr;
1629 
1630   PetscFunctionBegin;
1631   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1632   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1637 {
1638   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1639   Mat            A  = a->A,B = a->B;
1640   PetscErrorCode ierr;
1641   PetscReal      isend[5],irecv[5];
1642 
1643   PetscFunctionBegin;
1644   info->block_size = (PetscReal)matin->rmap->bs;
1645 
1646   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1647 
1648   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1649   isend[3] = info->memory;  isend[4] = info->mallocs;
1650 
1651   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1654   isend[3] += info->memory;  isend[4] += info->mallocs;
1655   if (flag == MAT_LOCAL) {
1656     info->nz_used      = isend[0];
1657     info->nz_allocated = isend[1];
1658     info->nz_unneeded  = isend[2];
1659     info->memory       = isend[3];
1660     info->mallocs      = isend[4];
1661   } else if (flag == MAT_GLOBAL_MAX) {
1662     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   } else if (flag == MAT_GLOBAL_SUM) {
1670     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1671 
1672     info->nz_used      = irecv[0];
1673     info->nz_allocated = irecv[1];
1674     info->nz_unneeded  = irecv[2];
1675     info->memory       = irecv[3];
1676     info->mallocs      = irecv[4];
1677   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1679   info->fill_ratio_needed = 0;
1680   info->factor_mallocs    = 0;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1685 {
1686   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1687   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1688   PetscErrorCode ierr;
1689 
1690   PetscFunctionBegin;
1691   switch (op) {
1692   case MAT_NEW_NONZERO_LOCATIONS:
1693   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695   case MAT_KEEP_NONZERO_PATTERN:
1696   case MAT_SUBMAT_SINGLEIS:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698     MatCheckPreallocated(A,1);
1699     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1700     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1701     break;
1702   case MAT_ROW_ORIENTED:
1703     MatCheckPreallocated(A,1);
1704     a->roworiented = flg;
1705 
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_NEW_DIAGONALS:
1710     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1711     break;
1712   case MAT_IGNORE_OFF_PROC_ENTRIES:
1713     a->donotstash = flg;
1714     break;
1715   case MAT_USE_HASH_TABLE:
1716     a->ht_flag = flg;
1717     break;
1718   case MAT_HERMITIAN:
1719     MatCheckPreallocated(A,1);
1720     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722 #if defined(PETSC_USE_COMPLEX)
1723     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1724 #endif
1725     break;
1726   case MAT_SPD:
1727     A->spd_set = PETSC_TRUE;
1728     A->spd     = flg;
1729     if (flg) {
1730       A->symmetric                  = PETSC_TRUE;
1731       A->structurally_symmetric     = PETSC_TRUE;
1732       A->symmetric_set              = PETSC_TRUE;
1733       A->structurally_symmetric_set = PETSC_TRUE;
1734     }
1735     break;
1736   case MAT_SYMMETRIC:
1737     MatCheckPreallocated(A,1);
1738     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_STRUCTURALLY_SYMMETRIC:
1741     MatCheckPreallocated(A,1);
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_SYMMETRY_ETERNAL:
1745     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1746     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1747     break;
1748   case MAT_IGNORE_LOWER_TRIANGULAR:
1749     aA->ignore_ltriangular = flg;
1750     break;
1751   case MAT_ERROR_LOWER_TRIANGULAR:
1752     aA->ignore_ltriangular = flg;
1753     break;
1754   case MAT_GETROW_UPPERTRIANGULAR:
1755     aA->getrow_utriangular = flg;
1756     break;
1757   default:
1758     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1764 {
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   if (reuse == MAT_INITIAL_MATRIX) {
1769     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1770   }  else if (reuse == MAT_REUSE_MATRIX) {
1771     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1777 {
1778   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1779   Mat            a     = baij->A, b=baij->B;
1780   PetscErrorCode ierr;
1781   PetscInt       nv,m,n;
1782   PetscBool      flg;
1783 
1784   PetscFunctionBegin;
1785   if (ll != rr) {
1786     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1787     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1788   }
1789   if (!ll) PetscFunctionReturn(0);
1790 
1791   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1792   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1793 
1794   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1795   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1796 
1797   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1798 
1799   /* left diagonalscale the off-diagonal part */
1800   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1801 
1802   /* scale the diagonal part */
1803   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1804 
1805   /* right diagonalscale the off-diagonal part */
1806   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1807   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1812 {
1813   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1814   PetscErrorCode ierr;
1815 
1816   PetscFunctionBegin;
1817   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1822 
1823 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1824 {
1825   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1826   Mat            a,b,c,d;
1827   PetscBool      flg;
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   a = matA->A; b = matA->B;
1832   c = matB->A; d = matB->B;
1833 
1834   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1835   if (flg) {
1836     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1837   }
1838   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1843 {
1844   PetscErrorCode ierr;
1845   PetscBool      isbaij;
1846 
1847   PetscFunctionBegin;
1848   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1849   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1850   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1851   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1852     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1853     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1854     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1855   } else {
1856     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1857     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1858 
1859     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1860     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1861   }
1862   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1867 {
1868   PetscErrorCode ierr;
1869 
1870   PetscFunctionBegin;
1871   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1876 {
1877   PetscErrorCode ierr;
1878   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1879   PetscBLASInt   bnz,one=1;
1880   Mat_SeqSBAIJ   *xa,*ya;
1881   Mat_SeqBAIJ    *xb,*yb;
1882 
1883   PetscFunctionBegin;
1884   if (str == SAME_NONZERO_PATTERN) {
1885     PetscScalar alpha = a;
1886     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1887     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1888     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1889     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1890     xb   = (Mat_SeqBAIJ*)xx->B->data;
1891     yb   = (Mat_SeqBAIJ*)yy->B->data;
1892     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1893     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1894     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1895   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1896     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1897     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1898     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1899   } else {
1900     Mat      B;
1901     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1902     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1903     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1904     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1905     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1906     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1907     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1908     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1909     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1910     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1911     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1912     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1913     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1914     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1915     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1916     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1917     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1918     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1919     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1920     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1921   }
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1926 {
1927   PetscErrorCode ierr;
1928   PetscInt       i;
1929   PetscBool      flg;
1930 
1931   PetscFunctionBegin;
1932   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1933   for (i=0; i<n; i++) {
1934     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1935     if (!flg) {
1936       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1937     }
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1943 {
1944   PetscErrorCode ierr;
1945   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1946   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1947 
1948   PetscFunctionBegin;
1949   if (!Y->preallocated) {
1950     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1951   } else if (!aij->nz) {
1952     PetscInt nonew = aij->nonew;
1953     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1954     aij->nonew = nonew;
1955   }
1956   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1961 {
1962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1967   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1968   if (d) {
1969     PetscInt rstart;
1970     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1971     *d += rstart/A->rmap->bs;
1972 
1973   }
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1978 {
1979   PetscFunctionBegin;
1980   *a = ((Mat_MPISBAIJ*)A->data)->A;
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 /* -------------------------------------------------------------------*/
1985 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1986                                        MatGetRow_MPISBAIJ,
1987                                        MatRestoreRow_MPISBAIJ,
1988                                        MatMult_MPISBAIJ,
1989                                /*  4*/ MatMultAdd_MPISBAIJ,
1990                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1991                                        MatMultAdd_MPISBAIJ,
1992                                        0,
1993                                        0,
1994                                        0,
1995                                /* 10*/ 0,
1996                                        0,
1997                                        0,
1998                                        MatSOR_MPISBAIJ,
1999                                        MatTranspose_MPISBAIJ,
2000                                /* 15*/ MatGetInfo_MPISBAIJ,
2001                                        MatEqual_MPISBAIJ,
2002                                        MatGetDiagonal_MPISBAIJ,
2003                                        MatDiagonalScale_MPISBAIJ,
2004                                        MatNorm_MPISBAIJ,
2005                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
2006                                        MatAssemblyEnd_MPISBAIJ,
2007                                        MatSetOption_MPISBAIJ,
2008                                        MatZeroEntries_MPISBAIJ,
2009                                /* 24*/ 0,
2010                                        0,
2011                                        0,
2012                                        0,
2013                                        0,
2014                                /* 29*/ MatSetUp_MPISBAIJ,
2015                                        0,
2016                                        0,
2017                                        MatGetDiagonalBlock_MPISBAIJ,
2018                                        0,
2019                                /* 34*/ MatDuplicate_MPISBAIJ,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                        0,
2024                                /* 39*/ MatAXPY_MPISBAIJ,
2025                                        MatCreateSubMatrices_MPISBAIJ,
2026                                        MatIncreaseOverlap_MPISBAIJ,
2027                                        MatGetValues_MPISBAIJ,
2028                                        MatCopy_MPISBAIJ,
2029                                /* 44*/ 0,
2030                                        MatScale_MPISBAIJ,
2031                                        MatShift_MPISBAIJ,
2032                                        0,
2033                                        0,
2034                                /* 49*/ 0,
2035                                        0,
2036                                        0,
2037                                        0,
2038                                        0,
2039                                /* 54*/ 0,
2040                                        0,
2041                                        MatSetUnfactored_MPISBAIJ,
2042                                        0,
2043                                        MatSetValuesBlocked_MPISBAIJ,
2044                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2045                                        0,
2046                                        0,
2047                                        0,
2048                                        0,
2049                                /* 64*/ 0,
2050                                        0,
2051                                        0,
2052                                        0,
2053                                        0,
2054                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2055                                        0,
2056                                        0,
2057                                        0,
2058                                        0,
2059                                /* 74*/ 0,
2060                                        0,
2061                                        0,
2062                                        0,
2063                                        0,
2064                                /* 79*/ 0,
2065                                        0,
2066                                        0,
2067                                        0,
2068                                        MatLoad_MPISBAIJ,
2069                                /* 84*/ 0,
2070                                        0,
2071                                        0,
2072                                        0,
2073                                        0,
2074                                /* 89*/ 0,
2075                                        0,
2076                                        0,
2077                                        0,
2078                                        0,
2079                                /* 94*/ 0,
2080                                        0,
2081                                        0,
2082                                        0,
2083                                        0,
2084                                /* 99*/ 0,
2085                                        0,
2086                                        0,
2087                                        0,
2088                                        0,
2089                                /*104*/ 0,
2090                                        MatRealPart_MPISBAIJ,
2091                                        MatImaginaryPart_MPISBAIJ,
2092                                        MatGetRowUpperTriangular_MPISBAIJ,
2093                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2094                                /*109*/ 0,
2095                                        0,
2096                                        0,
2097                                        0,
2098                                        MatMissingDiagonal_MPISBAIJ,
2099                                /*114*/ 0,
2100                                        0,
2101                                        0,
2102                                        0,
2103                                        0,
2104                                /*119*/ 0,
2105                                        0,
2106                                        0,
2107                                        0,
2108                                        0,
2109                                /*124*/ 0,
2110                                        0,
2111                                        0,
2112                                        0,
2113                                        0,
2114                                /*129*/ 0,
2115                                        0,
2116                                        0,
2117                                        0,
2118                                        0,
2119                                /*134*/ 0,
2120                                        0,
2121                                        0,
2122                                        0,
2123                                        0,
2124                                /*139*/ MatSetBlockSizes_Default,
2125                                        0,
2126                                        0,
2127                                        0,
2128                                        0,
2129                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2130 };
2131 
2132 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2133 {
2134   Mat_MPISBAIJ   *b;
2135   PetscErrorCode ierr;
2136   PetscInt       i,mbs,Mbs;
2137   PetscMPIInt    size;
2138 
2139   PetscFunctionBegin;
2140   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2141   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2142   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2143   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2144 
2145   b   = (Mat_MPISBAIJ*)B->data;
2146   mbs = B->rmap->n/bs;
2147   Mbs = B->rmap->N/bs;
2148   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2149 
2150   B->rmap->bs = bs;
2151   b->bs2      = bs*bs;
2152   b->mbs      = mbs;
2153   b->Mbs      = Mbs;
2154   b->nbs      = B->cmap->n/bs;
2155   b->Nbs      = B->cmap->N/bs;
2156 
2157   for (i=0; i<=b->size; i++) {
2158     b->rangebs[i] = B->rmap->range[i]/bs;
2159   }
2160   b->rstartbs = B->rmap->rstart/bs;
2161   b->rendbs   = B->rmap->rend/bs;
2162 
2163   b->cstartbs = B->cmap->rstart/bs;
2164   b->cendbs   = B->cmap->rend/bs;
2165 
2166 #if defined(PETSC_USE_CTABLE)
2167   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2168 #else
2169   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2170 #endif
2171   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2172   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2173   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2174   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2175   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2176   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2177   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2178   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2179   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2180 
2181   /* Because the B will have been resized we simply destroy it and create a new one each time */
2182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2183   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2184   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2185   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2186   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2187   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2188 
2189   if (!B->preallocated) {
2190     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2191     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2192     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2193     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2194     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2195   }
2196 
2197   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2198   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2199 
2200   B->preallocated  = PETSC_TRUE;
2201   B->was_assembled = PETSC_FALSE;
2202   B->assembled     = PETSC_FALSE;
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2207 {
2208   PetscInt       m,rstart,cstart,cend;
2209   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2210   const PetscInt *JJ    =0;
2211   PetscScalar    *values=0;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2216   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2217   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2218   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2219   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2220   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2221   m      = B->rmap->n/bs;
2222   rstart = B->rmap->rstart/bs;
2223   cstart = B->cmap->rstart/bs;
2224   cend   = B->cmap->rend/bs;
2225 
2226   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2227   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2228   for (i=0; i<m; i++) {
2229     nz = ii[i+1] - ii[i];
2230     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2231     nz_max = PetscMax(nz_max,nz);
2232     JJ     = jj + ii[i];
2233     for (j=0; j<nz; j++) {
2234       if (*JJ >= cstart) break;
2235       JJ++;
2236     }
2237     d = 0;
2238     for (; j<nz; j++) {
2239       if (*JJ++ >= cend) break;
2240       d++;
2241     }
2242     d_nnz[i] = d;
2243     o_nnz[i] = nz - d;
2244   }
2245   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2246   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2247 
2248   values = (PetscScalar*)V;
2249   if (!values) {
2250     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2251     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2252   }
2253   for (i=0; i<m; i++) {
2254     PetscInt          row    = i + rstart;
2255     PetscInt          ncols  = ii[i+1] - ii[i];
2256     const PetscInt    *icols = jj + ii[i];
2257     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2258     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2259   }
2260 
2261   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2262   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2263   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2264   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 /*MC
2269    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2270    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2271    the matrix is stored.
2272 
2273   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2274   can call MatSetOption(Mat, MAT_HERMITIAN);
2275 
2276    Options Database Keys:
2277 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2278 
2279   Level: beginner
2280 
2281 .seealso: MatCreateMPISBAIJ
2282 M*/
2283 
2284 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2285 {
2286   Mat_MPISBAIJ   *b;
2287   PetscErrorCode ierr;
2288   PetscBool      flg = PETSC_FALSE;
2289 
2290   PetscFunctionBegin;
2291   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2292   B->data = (void*)b;
2293   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2294 
2295   B->ops->destroy = MatDestroy_MPISBAIJ;
2296   B->ops->view    = MatView_MPISBAIJ;
2297   B->assembled    = PETSC_FALSE;
2298   B->insertmode   = NOT_SET_VALUES;
2299 
2300   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2301   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2302 
2303   /* build local table of row and column ownerships */
2304   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2305 
2306   /* build cache for off array entries formed */
2307   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2308 
2309   b->donotstash  = PETSC_FALSE;
2310   b->colmap      = NULL;
2311   b->garray      = NULL;
2312   b->roworiented = PETSC_TRUE;
2313 
2314   /* stuff used in block assembly */
2315   b->barray = 0;
2316 
2317   /* stuff used for matrix vector multiply */
2318   b->lvec    = 0;
2319   b->Mvctx   = 0;
2320   b->slvec0  = 0;
2321   b->slvec0b = 0;
2322   b->slvec1  = 0;
2323   b->slvec1a = 0;
2324   b->slvec1b = 0;
2325   b->sMvctx  = 0;
2326 
2327   /* stuff for MatGetRow() */
2328   b->rowindices   = 0;
2329   b->rowvalues    = 0;
2330   b->getrowactive = PETSC_FALSE;
2331 
2332   /* hash table stuff */
2333   b->ht           = 0;
2334   b->hd           = 0;
2335   b->ht_size      = 0;
2336   b->ht_flag      = PETSC_FALSE;
2337   b->ht_fact      = 0;
2338   b->ht_total_ct  = 0;
2339   b->ht_insert_ct = 0;
2340 
2341   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2342   b->ijonly = PETSC_FALSE;
2343 
2344   b->in_loc = 0;
2345   b->v_loc  = 0;
2346   b->n_loc  = 0;
2347 
2348   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2349   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2350   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2351   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2352 #if defined(PETSC_HAVE_ELEMENTAL)
2353   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2354 #endif
2355   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2356   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2357 
2358   B->symmetric                  = PETSC_TRUE;
2359   B->structurally_symmetric     = PETSC_TRUE;
2360   B->symmetric_set              = PETSC_TRUE;
2361   B->structurally_symmetric_set = PETSC_TRUE;
2362   B->symmetric_eternal          = PETSC_TRUE;
2363 
2364   B->hermitian                  = PETSC_FALSE;
2365   B->hermitian_set              = PETSC_FALSE;
2366 
2367   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2368   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2369   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2370   if (flg) {
2371     PetscReal fact = 1.39;
2372     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2373     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2374     if (fact <= 1.0) fact = 1.39;
2375     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2376     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2377   }
2378   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 /*MC
2383    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2384 
2385    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2386    and MATMPISBAIJ otherwise.
2387 
2388    Options Database Keys:
2389 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2390 
2391   Level: beginner
2392 
2393 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2394 M*/
2395 
2396 /*@C
2397    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2398    the user should preallocate the matrix storage by setting the parameters
2399    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2400    performance can be increased by more than a factor of 50.
2401 
2402    Collective on Mat
2403 
2404    Input Parameters:
2405 +  B - the matrix
2406 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2407           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2408 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2409            submatrix  (same for all local rows)
2410 .  d_nnz - array containing the number of block nonzeros in the various block rows
2411            in the upper triangular and diagonal part of the in diagonal portion of the local
2412            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2413            for the diagonal entry and set a value even if it is zero.
2414 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2415            submatrix (same for all local rows).
2416 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2417            off-diagonal portion of the local submatrix that is right of the diagonal
2418            (possibly different for each block row) or NULL.
2419 
2420 
2421    Options Database Keys:
2422 .   -mat_no_unroll - uses code that does not unroll the loops in the
2423                      block calculations (much slower)
2424 .   -mat_block_size - size of the blocks to use
2425 
2426    Notes:
2427 
2428    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2429    than it must be used on all processors that share the object for that argument.
2430 
2431    If the *_nnz parameter is given then the *_nz parameter is ignored
2432 
2433    Storage Information:
2434    For a square global matrix we define each processor's diagonal portion
2435    to be its local rows and the corresponding columns (a square submatrix);
2436    each processor's off-diagonal portion encompasses the remainder of the
2437    local matrix (a rectangular submatrix).
2438 
2439    The user can specify preallocated storage for the diagonal part of
2440    the local submatrix with either d_nz or d_nnz (not both).  Set
2441    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2442    memory allocation.  Likewise, specify preallocated storage for the
2443    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2444 
2445    You can call MatGetInfo() to get information on how effective the preallocation was;
2446    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2447    You can also run with the option -info and look for messages with the string
2448    malloc in them to see if additional memory allocation was needed.
2449 
2450    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2451    the figure below we depict these three local rows and all columns (0-11).
2452 
2453 .vb
2454            0 1 2 3 4 5 6 7 8 9 10 11
2455           --------------------------
2456    row 3  |. . . d d d o o o o  o  o
2457    row 4  |. . . d d d o o o o  o  o
2458    row 5  |. . . d d d o o o o  o  o
2459           --------------------------
2460 .ve
2461 
2462    Thus, any entries in the d locations are stored in the d (diagonal)
2463    submatrix, and any entries in the o locations are stored in the
2464    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2465    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2466 
2467    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2468    plus the diagonal part of the d matrix,
2469    and o_nz should indicate the number of block nonzeros per row in the o matrix
2470 
2471    In general, for PDE problems in which most nonzeros are near the diagonal,
2472    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2473    or you will get TERRIBLE performance; see the users' manual chapter on
2474    matrices.
2475 
2476    Level: intermediate
2477 
2478 .keywords: matrix, block, aij, compressed row, sparse, parallel
2479 
2480 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2481 @*/
2482 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2483 {
2484   PetscErrorCode ierr;
2485 
2486   PetscFunctionBegin;
2487   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2488   PetscValidType(B,1);
2489   PetscValidLogicalCollectiveInt(B,bs,2);
2490   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 /*@C
2495    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2496    (block compressed row).  For good matrix assembly performance
2497    the user should preallocate the matrix storage by setting the parameters
2498    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2499    performance can be increased by more than a factor of 50.
2500 
2501    Collective on MPI_Comm
2502 
2503    Input Parameters:
2504 +  comm - MPI communicator
2505 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2506           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2507 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2508            This value should be the same as the local size used in creating the
2509            y vector for the matrix-vector product y = Ax.
2510 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2511            This value should be the same as the local size used in creating the
2512            x vector for the matrix-vector product y = Ax.
2513 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2514 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2515 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2516            submatrix  (same for all local rows)
2517 .  d_nnz - array containing the number of block nonzeros in the various block rows
2518            in the upper triangular portion of the in diagonal portion of the local
2519            (possibly different for each block block row) or NULL.
2520            If you plan to factor the matrix you must leave room for the diagonal entry and
2521            set its value even if it is zero.
2522 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2523            submatrix (same for all local rows).
2524 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2525            off-diagonal portion of the local submatrix (possibly different for
2526            each block row) or NULL.
2527 
2528    Output Parameter:
2529 .  A - the matrix
2530 
2531    Options Database Keys:
2532 .   -mat_no_unroll - uses code that does not unroll the loops in the
2533                      block calculations (much slower)
2534 .   -mat_block_size - size of the blocks to use
2535 .   -mat_mpi - use the parallel matrix data structures even on one processor
2536                (defaults to using SeqBAIJ format on one processor)
2537 
2538    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2539    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2540    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2541 
2542    Notes:
2543    The number of rows and columns must be divisible by blocksize.
2544    This matrix type does not support complex Hermitian operation.
2545 
2546    The user MUST specify either the local or global matrix dimensions
2547    (possibly both).
2548 
2549    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2550    than it must be used on all processors that share the object for that argument.
2551 
2552    If the *_nnz parameter is given then the *_nz parameter is ignored
2553 
2554    Storage Information:
2555    For a square global matrix we define each processor's diagonal portion
2556    to be its local rows and the corresponding columns (a square submatrix);
2557    each processor's off-diagonal portion encompasses the remainder of the
2558    local matrix (a rectangular submatrix).
2559 
2560    The user can specify preallocated storage for the diagonal part of
2561    the local submatrix with either d_nz or d_nnz (not both).  Set
2562    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2563    memory allocation.  Likewise, specify preallocated storage for the
2564    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2565 
2566    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2567    the figure below we depict these three local rows and all columns (0-11).
2568 
2569 .vb
2570            0 1 2 3 4 5 6 7 8 9 10 11
2571           --------------------------
2572    row 3  |. . . d d d o o o o  o  o
2573    row 4  |. . . d d d o o o o  o  o
2574    row 5  |. . . d d d o o o o  o  o
2575           --------------------------
2576 .ve
2577 
2578    Thus, any entries in the d locations are stored in the d (diagonal)
2579    submatrix, and any entries in the o locations are stored in the
2580    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2581    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2582 
2583    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2584    plus the diagonal part of the d matrix,
2585    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2586    In general, for PDE problems in which most nonzeros are near the diagonal,
2587    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2588    or you will get TERRIBLE performance; see the users' manual chapter on
2589    matrices.
2590 
2591    Level: intermediate
2592 
2593 .keywords: matrix, block, aij, compressed row, sparse, parallel
2594 
2595 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2596 @*/
2597 
2598 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2599 {
2600   PetscErrorCode ierr;
2601   PetscMPIInt    size;
2602 
2603   PetscFunctionBegin;
2604   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2605   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2606   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2607   if (size > 1) {
2608     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2609     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2610   } else {
2611     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2612     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2613   }
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 
2618 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2619 {
2620   Mat            mat;
2621   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2622   PetscErrorCode ierr;
2623   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2624   PetscScalar    *array;
2625 
2626   PetscFunctionBegin;
2627   *newmat = 0;
2628 
2629   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2630   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2631   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2632   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2633   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2634 
2635   mat->factortype   = matin->factortype;
2636   mat->preallocated = PETSC_TRUE;
2637   mat->assembled    = PETSC_TRUE;
2638   mat->insertmode   = NOT_SET_VALUES;
2639 
2640   a      = (Mat_MPISBAIJ*)mat->data;
2641   a->bs2 = oldmat->bs2;
2642   a->mbs = oldmat->mbs;
2643   a->nbs = oldmat->nbs;
2644   a->Mbs = oldmat->Mbs;
2645   a->Nbs = oldmat->Nbs;
2646 
2647   a->size         = oldmat->size;
2648   a->rank         = oldmat->rank;
2649   a->donotstash   = oldmat->donotstash;
2650   a->roworiented  = oldmat->roworiented;
2651   a->rowindices   = 0;
2652   a->rowvalues    = 0;
2653   a->getrowactive = PETSC_FALSE;
2654   a->barray       = 0;
2655   a->rstartbs     = oldmat->rstartbs;
2656   a->rendbs       = oldmat->rendbs;
2657   a->cstartbs     = oldmat->cstartbs;
2658   a->cendbs       = oldmat->cendbs;
2659 
2660   /* hash table stuff */
2661   a->ht           = 0;
2662   a->hd           = 0;
2663   a->ht_size      = 0;
2664   a->ht_flag      = oldmat->ht_flag;
2665   a->ht_fact      = oldmat->ht_fact;
2666   a->ht_total_ct  = 0;
2667   a->ht_insert_ct = 0;
2668 
2669   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2670   if (oldmat->colmap) {
2671 #if defined(PETSC_USE_CTABLE)
2672     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2673 #else
2674     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2675     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2676     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2677 #endif
2678   } else a->colmap = 0;
2679 
2680   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2681     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2682     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2683     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2684   } else a->garray = 0;
2685 
2686   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2687   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2688   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2689   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2690   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2691 
2692   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2693   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2694   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2695   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2696 
2697   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2698   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2699   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2700   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2701   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2702   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2703   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2704   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2705   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2706   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2707   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2708   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2709   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2710 
2711   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2712   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2713   a->sMvctx = oldmat->sMvctx;
2714   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2715 
2716   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2717   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2718   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2719   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2720   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2721   *newmat = mat;
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2726 {
2727   PetscErrorCode ierr;
2728   PetscInt       i,nz,j,rstart,rend;
2729   PetscScalar    *vals,*buf;
2730   MPI_Comm       comm;
2731   MPI_Status     status;
2732   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2733   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2734   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2735   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2736   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2737   PetscInt       dcount,kmax,k,nzcount,tmp;
2738   int            fd;
2739   PetscBool      isbinary;
2740 
2741   PetscFunctionBegin;
2742   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2743   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2744 
2745   /* force binary viewer to load .info file if it has not yet done so */
2746   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2747   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2748   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2749   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2750   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2751   if (bs < 0) bs = 1;
2752 
2753   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2754   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2755   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2756   if (!rank) {
2757     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2758     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2759     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2760   }
2761 
2762   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2763   M    = header[1];
2764   N    = header[2];
2765 
2766   /* If global sizes are set, check if they are consistent with that given in the file */
2767   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2768   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2769 
2770   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2771 
2772   /*
2773      This code adds extra rows to make sure the number of rows is
2774      divisible by the blocksize
2775   */
2776   Mbs        = M/bs;
2777   extra_rows = bs - M + bs*(Mbs);
2778   if (extra_rows == bs) extra_rows = 0;
2779   else                  Mbs++;
2780   if (extra_rows &&!rank) {
2781     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2782   }
2783 
2784   /* determine ownership of all rows */
2785   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2786     mbs = Mbs/size + ((Mbs % size) > rank);
2787     m   = mbs*bs;
2788   } else { /* User Set */
2789     m   = newmat->rmap->n;
2790     mbs = m/bs;
2791   }
2792   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2793   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2794   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2795   rowners[0] = 0;
2796   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2797   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2798   rstart = rowners[rank];
2799   rend   = rowners[rank+1];
2800 
2801   /* distribute row lengths to all processors */
2802   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2803   if (!rank) {
2804     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2805     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2806     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2807     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2808     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2809     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2810     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2811   } else {
2812     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2813   }
2814 
2815   if (!rank) {   /* procs[0] */
2816     /* calculate the number of nonzeros on each processor */
2817     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2818     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2819     for (i=0; i<size; i++) {
2820       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2821         procsnz[i] += rowlengths[j];
2822       }
2823     }
2824     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2825 
2826     /* determine max buffer needed and allocate it */
2827     maxnz = 0;
2828     for (i=0; i<size; i++) {
2829       maxnz = PetscMax(maxnz,procsnz[i]);
2830     }
2831     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2832 
2833     /* read in my part of the matrix column indices  */
2834     nz     = procsnz[0];
2835     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2836     mycols = ibuf;
2837     if (size == 1) nz -= extra_rows;
2838     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2839     if (size == 1) {
2840       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2841     }
2842 
2843     /* read in every ones (except the last) and ship off */
2844     for (i=1; i<size-1; i++) {
2845       nz   = procsnz[i];
2846       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2847       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2848     }
2849     /* read in the stuff for the last proc */
2850     if (size != 1) {
2851       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2852       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2853       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2854       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2855     }
2856     ierr = PetscFree(cols);CHKERRQ(ierr);
2857   } else {  /* procs[i], i>0 */
2858     /* determine buffer space needed for message */
2859     nz = 0;
2860     for (i=0; i<m; i++) nz += locrowlens[i];
2861     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2862     mycols = ibuf;
2863     /* receive message of column indices*/
2864     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2865     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2866     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2867   }
2868 
2869   /* loop over local rows, determining number of off diagonal entries */
2870   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2871   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2872   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2873   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2874   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2875   rowcount = 0;
2876   nzcount  = 0;
2877   for (i=0; i<mbs; i++) {
2878     dcount  = 0;
2879     odcount = 0;
2880     for (j=0; j<bs; j++) {
2881       kmax = locrowlens[rowcount];
2882       for (k=0; k<kmax; k++) {
2883         tmp = mycols[nzcount++]/bs; /* block col. index */
2884         if (!mask[tmp]) {
2885           mask[tmp] = 1;
2886           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2887           else masked1[dcount++] = tmp; /* entry in diag portion */
2888         }
2889       }
2890       rowcount++;
2891     }
2892 
2893     dlens[i]  = dcount;  /* d_nzz[i] */
2894     odlens[i] = odcount; /* o_nzz[i] */
2895 
2896     /* zero out the mask elements we set */
2897     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2898     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2899   }
2900   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2901   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2902   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2903 
2904   if (!rank) {
2905     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2906     /* read in my part of the matrix numerical values  */
2907     nz     = procsnz[0];
2908     vals   = buf;
2909     mycols = ibuf;
2910     if (size == 1) nz -= extra_rows;
2911     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2912     if (size == 1) {
2913       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2914     }
2915 
2916     /* insert into matrix */
2917     jj = rstart*bs;
2918     for (i=0; i<m; i++) {
2919       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2920       mycols += locrowlens[i];
2921       vals   += locrowlens[i];
2922       jj++;
2923     }
2924 
2925     /* read in other processors (except the last one) and ship out */
2926     for (i=1; i<size-1; i++) {
2927       nz   = procsnz[i];
2928       vals = buf;
2929       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2930       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2931     }
2932     /* the last proc */
2933     if (size != 1) {
2934       nz   = procsnz[i] - extra_rows;
2935       vals = buf;
2936       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2937       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2938       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2939     }
2940     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2941 
2942   } else {
2943     /* receive numeric values */
2944     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2945 
2946     /* receive message of values*/
2947     vals   = buf;
2948     mycols = ibuf;
2949     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2950     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2951     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2952 
2953     /* insert into matrix */
2954     jj = rstart*bs;
2955     for (i=0; i<m; i++) {
2956       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2957       mycols += locrowlens[i];
2958       vals   += locrowlens[i];
2959       jj++;
2960     }
2961   }
2962 
2963   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2964   ierr = PetscFree(buf);CHKERRQ(ierr);
2965   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2966   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2967   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2968   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2969   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2970   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 /*XXXXX@
2975    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2976 
2977    Input Parameters:
2978 .  mat  - the matrix
2979 .  fact - factor
2980 
2981    Not Collective on Mat, each process can have a different hash factor
2982 
2983    Level: advanced
2984 
2985   Notes:
2986    This can also be set by the command line option: -mat_use_hash_table fact
2987 
2988 .keywords: matrix, hashtable, factor, HT
2989 
2990 .seealso: MatSetOption()
2991 @XXXXX*/
2992 
2993 
2994 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2995 {
2996   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2997   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2998   PetscReal      atmp;
2999   PetscReal      *work,*svalues,*rvalues;
3000   PetscErrorCode ierr;
3001   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
3002   PetscMPIInt    rank,size;
3003   PetscInt       *rowners_bs,dest,count,source;
3004   PetscScalar    *va;
3005   MatScalar      *ba;
3006   MPI_Status     stat;
3007 
3008   PetscFunctionBegin;
3009   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3010   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
3011   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3012 
3013   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3014   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3015 
3016   bs  = A->rmap->bs;
3017   mbs = a->mbs;
3018   Mbs = a->Mbs;
3019   ba  = b->a;
3020   bi  = b->i;
3021   bj  = b->j;
3022 
3023   /* find ownerships */
3024   rowners_bs = A->rmap->range;
3025 
3026   /* each proc creates an array to be distributed */
3027   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3028   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
3029 
3030   /* row_max for B */
3031   if (rank != size-1) {
3032     for (i=0; i<mbs; i++) {
3033       ncols = bi[1] - bi[0]; bi++;
3034       brow  = bs*i;
3035       for (j=0; j<ncols; j++) {
3036         bcol = bs*(*bj);
3037         for (kcol=0; kcol<bs; kcol++) {
3038           col  = bcol + kcol;                /* local col index */
3039           col += rowners_bs[rank+1];      /* global col index */
3040           for (krow=0; krow<bs; krow++) {
3041             atmp = PetscAbsScalar(*ba); ba++;
3042             row  = brow + krow;   /* local row index */
3043             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3044             if (work[col] < atmp) work[col] = atmp;
3045           }
3046         }
3047         bj++;
3048       }
3049     }
3050 
3051     /* send values to its owners */
3052     for (dest=rank+1; dest<size; dest++) {
3053       svalues = work + rowners_bs[dest];
3054       count   = rowners_bs[dest+1]-rowners_bs[dest];
3055       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3056     }
3057   }
3058 
3059   /* receive values */
3060   if (rank) {
3061     rvalues = work;
3062     count   = rowners_bs[rank+1]-rowners_bs[rank];
3063     for (source=0; source<rank; source++) {
3064       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3065       /* process values */
3066       for (i=0; i<count; i++) {
3067         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3068       }
3069     }
3070   }
3071 
3072   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3073   ierr = PetscFree(work);CHKERRQ(ierr);
3074   PetscFunctionReturn(0);
3075 }
3076 
3077 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3078 {
3079   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3080   PetscErrorCode    ierr;
3081   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3082   PetscScalar       *x,*ptr,*from;
3083   Vec               bb1;
3084   const PetscScalar *b;
3085 
3086   PetscFunctionBegin;
3087   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3088   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3089 
3090   if (flag == SOR_APPLY_UPPER) {
3091     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3092     PetscFunctionReturn(0);
3093   }
3094 
3095   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3096     if (flag & SOR_ZERO_INITIAL_GUESS) {
3097       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3098       its--;
3099     }
3100 
3101     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3102     while (its--) {
3103 
3104       /* lower triangular part: slvec0b = - B^T*xx */
3105       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3106 
3107       /* copy xx into slvec0a */
3108       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3109       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3110       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3111       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3112 
3113       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3114 
3115       /* copy bb into slvec1a */
3116       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3117       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3118       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3119       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3120 
3121       /* set slvec1b = 0 */
3122       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3123 
3124       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3125       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3126       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3127       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3128 
3129       /* upper triangular part: bb1 = bb1 - B*x */
3130       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3131 
3132       /* local diagonal sweep */
3133       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3134     }
3135     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3136   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3137     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3138   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3139     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3140   } else if (flag & SOR_EISENSTAT) {
3141     Vec               xx1;
3142     PetscBool         hasop;
3143     const PetscScalar *diag;
3144     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3145     PetscInt          i,n;
3146 
3147     if (!mat->xx1) {
3148       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3149       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3150     }
3151     xx1 = mat->xx1;
3152     bb1 = mat->bb1;
3153 
3154     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3155 
3156     if (!mat->diag) {
3157       /* this is wrong for same matrix with new nonzero values */
3158       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3159       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3160     }
3161     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3162 
3163     if (hasop) {
3164       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3165       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3166     } else {
3167       /*
3168           These two lines are replaced by code that may be a bit faster for a good compiler
3169       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3170       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3171       */
3172       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3173       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3174       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3175       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3176       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3177       if (omega == 1.0) {
3178         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3179         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3180       } else {
3181         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3182         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3183       }
3184       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3185       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3186       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3187       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3188     }
3189 
3190     /* multiply off-diagonal portion of matrix */
3191     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3192     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3193     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3194     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3195     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3196     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3197     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3198     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3199     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3200     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3201 
3202     /* local sweep */
3203     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3204     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3205   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3206   PetscFunctionReturn(0);
3207 }
3208 
3209 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3210 {
3211   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3212   PetscErrorCode ierr;
3213   Vec            lvec1,bb1;
3214 
3215   PetscFunctionBegin;
3216   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3217   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3218 
3219   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3220     if (flag & SOR_ZERO_INITIAL_GUESS) {
3221       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3222       its--;
3223     }
3224 
3225     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3226     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3227     while (its--) {
3228       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3229 
3230       /* lower diagonal part: bb1 = bb - B^T*xx */
3231       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3232       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3233 
3234       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3235       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3236       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3237 
3238       /* upper diagonal part: bb1 = bb1 - B*x */
3239       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3240       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3241 
3242       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3243 
3244       /* diagonal sweep */
3245       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3246     }
3247     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3248     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3249   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 /*@
3254      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3255          CSR format the local rows.
3256 
3257    Collective on MPI_Comm
3258 
3259    Input Parameters:
3260 +  comm - MPI communicator
3261 .  bs - the block size, only a block size of 1 is supported
3262 .  m - number of local rows (Cannot be PETSC_DECIDE)
3263 .  n - This value should be the same as the local size used in creating the
3264        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3265        calculated if N is given) For square matrices n is almost always m.
3266 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3267 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3268 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3269 .   j - column indices
3270 -   a - matrix values
3271 
3272    Output Parameter:
3273 .   mat - the matrix
3274 
3275    Level: intermediate
3276 
3277    Notes:
3278        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3279      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3280      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3281 
3282        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3283 
3284 .keywords: matrix, aij, compressed row, sparse, parallel
3285 
3286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3287           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3288 @*/
3289 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3290 {
3291   PetscErrorCode ierr;
3292 
3293 
3294   PetscFunctionBegin;
3295   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3296   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3297   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3298   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3299   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3300   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3301   PetscFunctionReturn(0);
3302 }
3303 
3304 
3305 /*@C
3306    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3307    (the default parallel PETSc format).
3308 
3309    Collective on MPI_Comm
3310 
3311    Input Parameters:
3312 +  B - the matrix
3313 .  bs - the block size
3314 .  i - the indices into j for the start of each local row (starts with zero)
3315 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3316 -  v - optional values in the matrix
3317 
3318    Level: developer
3319 
3320 .keywords: matrix, aij, compressed row, sparse, parallel
3321 
3322 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3323 @*/
3324 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3325 {
3326   PetscErrorCode ierr;
3327 
3328   PetscFunctionBegin;
3329   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3330   PetscFunctionReturn(0);
3331 }
3332 
3333 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3334 {
3335   PetscErrorCode ierr;
3336   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3337   PetscInt       *indx;
3338   PetscScalar    *values;
3339 
3340   PetscFunctionBegin;
3341   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3342   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3343     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3344     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3345     PetscInt       *bindx,rmax=a->rmax,j;
3346 
3347     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3348     mbs = m/bs; Nbs = N/cbs;
3349     if (n == PETSC_DECIDE) {
3350       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3351     }
3352     /* Check sum(n) = Nbs */
3353     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3354     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3355 
3356     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3357     rstart -= mbs;
3358 
3359     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3360     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3361     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3362     for (i=0; i<mbs; i++) {
3363       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3364       nnz  = nnz/bs;
3365       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3366       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3367       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3368     }
3369     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3370     ierr = PetscFree(bindx);CHKERRQ(ierr);
3371 
3372     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3373     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3374     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3375     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3376     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3377     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3378   }
3379 
3380   /* numeric phase */
3381   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3382   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3383 
3384   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3385   for (i=0; i<m; i++) {
3386     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3387     Ii   = i + rstart;
3388     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3389     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3390   }
3391   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3392   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3393   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3394   PetscFunctionReturn(0);
3395 }
3396