xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision bfcb38ea38335faa6e7f8d97f6bc6ce9aa2a1dd1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 static PetscErrorCode MatConvert_MPISBAIJ_XAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
78     ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
79     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
80   } else B = *newmat;
81 
82   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
84     PetscInt          ncols;
85     const PetscInt    *row;
86     const PetscScalar *vals;
87 
88     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
89     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
90     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
91     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
92   }
93   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
94   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
95   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
96 
97   if (reuse == MAT_INPLACE_MATRIX) {
98     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
99   } else {
100     *newmat = B;
101   }
102   PetscFunctionReturn(0);
103 }
104 
105 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
106 {
107   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
108   PetscErrorCode ierr;
109 
110   PetscFunctionBegin;
111   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
112   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
113   PetscFunctionReturn(0);
114 }
115 
116 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
117 {
118   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
119   PetscErrorCode ierr;
120 
121   PetscFunctionBegin;
122   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
123   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
124   PetscFunctionReturn(0);
125 }
126 
127 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
128   { \
129  \
130     brow = row/bs;  \
131     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
132     rmax = aimax[brow]; nrow = ailen[brow]; \
133     bcol = col/bs; \
134     ridx = row % bs; cidx = col % bs; \
135     low  = 0; high = nrow; \
136     while (high-low > 3) { \
137       t = (low+high)/2; \
138       if (rp[t] > bcol) high = t; \
139       else              low  = t; \
140     } \
141     for (_i=low; _i<high; _i++) { \
142       if (rp[_i] > bcol) break; \
143       if (rp[_i] == bcol) { \
144         bap = ap + bs2*_i + bs*cidx + ridx; \
145         if (addv == ADD_VALUES) *bap += value;  \
146         else                    *bap  = value;  \
147         goto a_noinsert; \
148       } \
149     } \
150     if (a->nonew == 1) goto a_noinsert; \
151     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
152     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
153     N = nrow++ - 1;  \
154     /* shift up all the later entries in this row */ \
155     for (ii=N; ii>=_i; ii--) { \
156       rp[ii+1] = rp[ii]; \
157       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
158     } \
159     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
160     rp[_i]                      = bcol;  \
161     ap[bs2*_i + bs*cidx + ridx] = value;  \
162     A->nonzerostate++;\
163 a_noinsert:; \
164     ailen[brow] = nrow; \
165   }
166 
167 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
168   { \
169     brow = row/bs;  \
170     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171     rmax = bimax[brow]; nrow = bilen[brow]; \
172     bcol = col/bs; \
173     ridx = row % bs; cidx = col % bs; \
174     low  = 0; high = nrow; \
175     while (high-low > 3) { \
176       t = (low+high)/2; \
177       if (rp[t] > bcol) high = t; \
178       else              low  = t; \
179     } \
180     for (_i=low; _i<high; _i++) { \
181       if (rp[_i] > bcol) break; \
182       if (rp[_i] == bcol) { \
183         bap = ap + bs2*_i + bs*cidx + ridx; \
184         if (addv == ADD_VALUES) *bap += value;  \
185         else                    *bap  = value;  \
186         goto b_noinsert; \
187       } \
188     } \
189     if (b->nonew == 1) goto b_noinsert; \
190     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
191     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
192     N = nrow++ - 1;  \
193     /* shift up all the later entries in this row */ \
194     for (ii=N; ii>=_i; ii--) { \
195       rp[ii+1] = rp[ii]; \
196       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
197     } \
198     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
199     rp[_i]                      = bcol;  \
200     ap[bs2*_i + bs*cidx + ridx] = value;  \
201     B->nonzerostate++;\
202 b_noinsert:; \
203     bilen[brow] = nrow; \
204   }
205 
206 /* Only add/insert a(i,j) with i<=j (blocks).
207    Any a(i,j) with i>j input by user is ingored.
208 */
209 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscBool      roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap->rstart;
217   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
218   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
219 
220   /* Some Variables required in the macro */
221   Mat          A     = baij->A;
222   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar    *aa   =a->a;
225 
226   Mat         B     = baij->B;
227   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar   *ba   =b->a;
230 
231   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar *ap,*bap;
234 
235   /* for stash */
236   PetscInt  n_loc, *in_loc = NULL;
237   MatScalar *v_loc = NULL;
238 
239   PetscFunctionBegin;
240   if (!baij->donotstash) {
241     if (n > baij->n_loc) {
242       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
243       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
244       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
246 
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs) {
262           if (a->ignore_ltriangular) {
263             continue;    /* ignore lower triangular blocks */
264           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
265         }
266         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
267           col  = in[j] - cstart_orig;         /* local col index */
268           brow = row/bs; bcol = col/bs;
269           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
270           if (roworiented) value = v[i*n+j];
271           else             value = v[i+j*m];
272           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
273           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
274         } else if (in[j] < 0) continue;
275 #if defined(PETSC_USE_DEBUG)
276         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
277 #endif
278         else {  /* off-diag entry (B) */
279           if (mat->was_assembled) {
280             if (!baij->colmap) {
281               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
282             }
283 #if defined(PETSC_USE_CTABLE)
284             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
285             col  = col - 1;
286 #else
287             col = baij->colmap[in[j]/bs] - 1;
288 #endif
289             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
290               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
291               col  =  in[j];
292               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
293               B    = baij->B;
294               b    = (Mat_SeqBAIJ*)(B)->data;
295               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
296               ba   = b->a;
297             } else col += in[j]%bs;
298           } else col = in[j];
299           if (roworiented) value = v[i*n+j];
300           else             value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
307       if (!baij->donotstash) {
308         mat->assembled = PETSC_FALSE;
309         n_loc          = 0;
310         for (j=0; j<n; j++) {
311           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
312           in_loc[n_loc] = in[j];
313           if (roworiented) {
314             v_loc[n_loc] = v[i*n+j];
315           } else {
316             v_loc[n_loc] = v[j*m+i];
317           }
318           n_loc++;
319         }
320         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
321       }
322     }
323   }
324   PetscFunctionReturn(0);
325 }
326 
327 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
328 {
329   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
330   PetscErrorCode    ierr;
331   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
332   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
333   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
334   PetscBool         roworiented=a->roworiented;
335   const PetscScalar *value     = v;
336   MatScalar         *ap,*aa = a->a,*bap;
337 
338   PetscFunctionBegin;
339   if (col < row) {
340     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
341     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
342   }
343   rp   = aj + ai[row];
344   ap   = aa + bs2*ai[row];
345   rmax = imax[row];
346   nrow = ailen[row];
347   value = v;
348   low   = 0;
349   high  = nrow;
350 
351   while (high-low > 7) {
352     t = (low+high)/2;
353     if (rp[t] > col) high = t;
354     else             low  = t;
355   }
356   for (i=low; i<high; i++) {
357     if (rp[i] > col) break;
358     if (rp[i] == col) {
359       bap = ap +  bs2*i;
360       if (roworiented) {
361         if (is == ADD_VALUES) {
362           for (ii=0; ii<bs; ii++) {
363             for (jj=ii; jj<bs2; jj+=bs) {
364               bap[jj] += *value++;
365             }
366           }
367         } else {
368           for (ii=0; ii<bs; ii++) {
369             for (jj=ii; jj<bs2; jj+=bs) {
370               bap[jj] = *value++;
371             }
372           }
373         }
374       } else {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++) {
377             for (jj=0; jj<bs; jj++) {
378               *bap++ += *value++;
379             }
380           }
381         } else {
382           for (ii=0; ii<bs; ii++) {
383             for (jj=0; jj<bs; jj++) {
384               *bap++  = *value++;
385             }
386           }
387         }
388       }
389       goto noinsert2;
390     }
391   }
392   if (nonew == 1) goto noinsert2;
393   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
394   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
395   N = nrow++ - 1; high++;
396   /* shift up all the later entries in this row */
397   for (ii=N; ii>=i; ii--) {
398     rp[ii+1] = rp[ii];
399     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
400   }
401   if (N >= i) {
402     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
403   }
404   rp[i] = col;
405   bap   = ap +  bs2*i;
406   if (roworiented) {
407     for (ii=0; ii<bs; ii++) {
408       for (jj=ii; jj<bs2; jj+=bs) {
409         bap[jj] = *value++;
410       }
411     }
412   } else {
413     for (ii=0; ii<bs; ii++) {
414       for (jj=0; jj<bs; jj++) {
415         *bap++ = *value++;
416       }
417     }
418   }
419   noinsert2:;
420   ailen[row] = nrow;
421   PetscFunctionReturn(0);
422 }
423 
424 /*
425    This routine is exactly duplicated in mpibaij.c
426 */
427 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
428 {
429   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
430   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
431   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
432   PetscErrorCode    ierr;
433   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
434   PetscBool         roworiented=a->roworiented;
435   const PetscScalar *value     = v;
436   MatScalar         *ap,*aa = a->a,*bap;
437 
438   PetscFunctionBegin;
439   rp   = aj + ai[row];
440   ap   = aa + bs2*ai[row];
441   rmax = imax[row];
442   nrow = ailen[row];
443   low  = 0;
444   high = nrow;
445   value = v;
446   while (high-low > 7) {
447     t = (low+high)/2;
448     if (rp[t] > col) high = t;
449     else             low  = t;
450   }
451   for (i=low; i<high; i++) {
452     if (rp[i] > col) break;
453     if (rp[i] == col) {
454       bap = ap +  bs2*i;
455       if (roworiented) {
456         if (is == ADD_VALUES) {
457           for (ii=0; ii<bs; ii++) {
458             for (jj=ii; jj<bs2; jj+=bs) {
459               bap[jj] += *value++;
460             }
461           }
462         } else {
463           for (ii=0; ii<bs; ii++) {
464             for (jj=ii; jj<bs2; jj+=bs) {
465               bap[jj] = *value++;
466             }
467           }
468         }
469       } else {
470         if (is == ADD_VALUES) {
471           for (ii=0; ii<bs; ii++,value+=bs) {
472             for (jj=0; jj<bs; jj++) {
473               bap[jj] += value[jj];
474             }
475             bap += bs;
476           }
477         } else {
478           for (ii=0; ii<bs; ii++,value+=bs) {
479             for (jj=0; jj<bs; jj++) {
480               bap[jj]  = value[jj];
481             }
482             bap += bs;
483           }
484         }
485       }
486       goto noinsert2;
487     }
488   }
489   if (nonew == 1) goto noinsert2;
490   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
491   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
492   N = nrow++ - 1; high++;
493   /* shift up all the later entries in this row */
494   for (ii=N; ii>=i; ii--) {
495     rp[ii+1] = rp[ii];
496     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
497   }
498   if (N >= i) {
499     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
500   }
501   rp[i] = col;
502   bap   = ap +  bs2*i;
503   if (roworiented) {
504     for (ii=0; ii<bs; ii++) {
505       for (jj=ii; jj<bs2; jj+=bs) {
506         bap[jj] = *value++;
507       }
508     }
509   } else {
510     for (ii=0; ii<bs; ii++) {
511       for (jj=0; jj<bs; jj++) {
512         *bap++ = *value++;
513       }
514     }
515   }
516   noinsert2:;
517   ailen[row] = nrow;
518   PetscFunctionReturn(0);
519 }
520 
521 /*
522     This routine could be optimized by removing the need for the block copy below and passing stride information
523   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
524 */
525 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
526 {
527   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
528   const MatScalar *value;
529   MatScalar       *barray     =baij->barray;
530   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
531   PetscErrorCode  ierr;
532   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
533   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
534   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
535 
536   PetscFunctionBegin;
537   if (!barray) {
538     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
539     baij->barray = barray;
540   }
541 
542   if (roworiented) {
543     stepval = (n-1)*bs;
544   } else {
545     stepval = (m-1)*bs;
546   }
547   for (i=0; i<m; i++) {
548     if (im[i] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
551 #endif
552     if (im[i] >= rstart && im[i] < rend) {
553       row = im[i] - rstart;
554       for (j=0; j<n; j++) {
555         if (im[i] > in[j]) {
556           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
557           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
558         }
559         /* If NumCol = 1 then a copy is not required */
560         if ((roworiented) && (n == 1)) {
561           barray = (MatScalar*) v + i*bs2;
562         } else if ((!roworiented) && (m == 1)) {
563           barray = (MatScalar*) v + j*bs2;
564         } else { /* Here a copy is required */
565           if (roworiented) {
566             value = v + i*(stepval+bs)*bs + j*bs;
567           } else {
568             value = v + j*(stepval+bs)*bs + i*bs;
569           }
570           for (ii=0; ii<bs; ii++,value+=stepval) {
571             for (jj=0; jj<bs; jj++) {
572               *barray++ = *value++;
573             }
574           }
575           barray -=bs2;
576         }
577 
578         if (in[j] >= cstart && in[j] < cend) {
579           col  = in[j] - cstart;
580           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
581         } else if (in[j] < 0) continue;
582 #if defined(PETSC_USE_DEBUG)
583         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584 #endif
585         else {
586           if (mat->was_assembled) {
587             if (!baij->colmap) {
588               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
589             }
590 
591 #if defined(PETSC_USE_DEBUG)
592 #if defined(PETSC_USE_CTABLE)
593             { PetscInt data;
594               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
595               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
596             }
597 #else
598             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
599 #endif
600 #endif
601 #if defined(PETSC_USE_CTABLE)
602             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
603             col  = (col - 1)/bs;
604 #else
605             col = (baij->colmap[in[j]] - 1)/bs;
606 #endif
607             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
608               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
609               col  = in[j];
610             }
611           } else col = in[j];
612           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
613         }
614       }
615     } else {
616       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
617       if (!baij->donotstash) {
618         if (roworiented) {
619           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
620         } else {
621           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
622         }
623       }
624     }
625   }
626   PetscFunctionReturn(0);
627 }
628 
629 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
630 {
631   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
632   PetscErrorCode ierr;
633   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
634   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
635 
636   PetscFunctionBegin;
637   for (i=0; i<m; i++) {
638     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
639     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
640     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
641       row = idxm[i] - bsrstart;
642       for (j=0; j<n; j++) {
643         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
644         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
645         if (idxn[j] >= bscstart && idxn[j] < bscend) {
646           col  = idxn[j] - bscstart;
647           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
648         } else {
649           if (!baij->colmap) {
650             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
651           }
652 #if defined(PETSC_USE_CTABLE)
653           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
654           data--;
655 #else
656           data = baij->colmap[idxn[j]/bs]-1;
657 #endif
658           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
659           else {
660             col  = data + idxn[j]%bs;
661             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
662           }
663         }
664       }
665     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
666   }
667   PetscFunctionReturn(0);
668 }
669 
670 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
671 {
672   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
673   PetscErrorCode ierr;
674   PetscReal      sum[2],*lnorm2;
675 
676   PetscFunctionBegin;
677   if (baij->size == 1) {
678     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
679   } else {
680     if (type == NORM_FROBENIUS) {
681       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
682       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
683       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
684       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
685       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
686       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
687       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
688       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
690       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
691       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
692       PetscReal    *rsum,*rsum2,vabs;
693       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
694       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
695       MatScalar    *v;
696 
697       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
698       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
699       /* Amat */
700       v = amat->a; jj = amat->j;
701       for (brow=0; brow<mbs; brow++) {
702         grow = bs*(rstart + brow);
703         nz   = amat->i[brow+1] - amat->i[brow];
704         for (bcol=0; bcol<nz; bcol++) {
705           gcol = bs*(rstart + *jj); jj++;
706           for (col=0; col<bs; col++) {
707             for (row=0; row<bs; row++) {
708               vabs            = PetscAbsScalar(*v); v++;
709               rsum[gcol+col] += vabs;
710               /* non-diagonal block */
711               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
712             }
713           }
714         }
715         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
716       }
717       /* Bmat */
718       v = bmat->a; jj = bmat->j;
719       for (brow=0; brow<mbs; brow++) {
720         grow = bs*(rstart + brow);
721         nz = bmat->i[brow+1] - bmat->i[brow];
722         for (bcol=0; bcol<nz; bcol++) {
723           gcol = bs*garray[*jj]; jj++;
724           for (col=0; col<bs; col++) {
725             for (row=0; row<bs; row++) {
726               vabs            = PetscAbsScalar(*v); v++;
727               rsum[gcol+col] += vabs;
728               rsum[grow+row] += vabs;
729             }
730           }
731         }
732         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
733       }
734       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
735       *norm = 0.0;
736       for (col=0; col<mat->cmap->N; col++) {
737         if (rsum2[col] > *norm) *norm = rsum2[col];
738       }
739       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
740     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749   PetscInt       nstash,reallocs;
750 
751   PetscFunctionBegin;
752   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
753 
754   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
755   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
756   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
757   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
758   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
759   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
764 {
765   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
766   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
767   PetscErrorCode ierr;
768   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
769   PetscInt       *row,*col;
770   PetscBool      other_disassembled;
771   PetscMPIInt    n;
772   PetscBool      r1,r2,r3;
773   MatScalar      *val;
774 
775   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
776   PetscFunctionBegin;
777   if (!baij->donotstash &&  !mat->nooffprocentries) {
778     while (1) {
779       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
780       if (!flg) break;
781 
782       for (i=0; i<n;) {
783         /* Now identify the consecutive vals belonging to the same row */
784         for (j=i,rstart=row[j]; j<n; j++) {
785           if (row[j] != rstart) break;
786         }
787         if (j < n) ncols = j-i;
788         else       ncols = n-i;
789         /* Now assemble all these values with a single function call */
790         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
791         i    = j;
792       }
793     }
794     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
795     /* Now process the block-stash. Since the values are stashed column-oriented,
796        set the roworiented flag to column oriented, and after MatSetValues()
797        restore the original flags */
798     r1 = baij->roworiented;
799     r2 = a->roworiented;
800     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
801 
802     baij->roworiented = PETSC_FALSE;
803     a->roworiented    = PETSC_FALSE;
804 
805     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
806     while (1) {
807       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
808       if (!flg) break;
809 
810       for (i=0; i<n;) {
811         /* Now identify the consecutive vals belonging to the same row */
812         for (j=i,rstart=row[j]; j<n; j++) {
813           if (row[j] != rstart) break;
814         }
815         if (j < n) ncols = j-i;
816         else       ncols = n-i;
817         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
818         i    = j;
819       }
820     }
821     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
822 
823     baij->roworiented = r1;
824     a->roworiented    = r2;
825 
826     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
827   }
828 
829   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
830   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
831 
832   /* determine if any processor has disassembled, if so we must
833      also disassemble ourselfs, in order that we may reassemble. */
834   /*
835      if nonzero structure of submatrix B cannot change then we know that
836      no processor disassembled thus we can skip this stuff
837   */
838   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
839     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
840     if (mat->was_assembled && !other_disassembled) {
841       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
842     }
843   }
844 
845   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
846     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
847   }
848   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
849   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
850 
851   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
852 
853   baij->rowvalues = 0;
854 
855   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
856   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
857     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
858     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
859   }
860   PetscFunctionReturn(0);
861 }
862 
863 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
864 #include <petscdraw.h>
865 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
866 {
867   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
868   PetscErrorCode    ierr;
869   PetscInt          bs   = mat->rmap->bs;
870   PetscMPIInt       rank = baij->rank;
871   PetscBool         iascii,isdraw;
872   PetscViewer       sviewer;
873   PetscViewerFormat format;
874 
875   PetscFunctionBegin;
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
877   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
878   if (iascii) {
879     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
880     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
881       MatInfo info;
882       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
883       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
884       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
885       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
886       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
887       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
888       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
889       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
890       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
892       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
893       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
894       PetscFunctionReturn(0);
895     } else if (format == PETSC_VIEWER_ASCII_INFO) {
896       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
897       PetscFunctionReturn(0);
898     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
899       PetscFunctionReturn(0);
900     }
901   }
902 
903   if (isdraw) {
904     PetscDraw draw;
905     PetscBool isnull;
906     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
907     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
908     if (isnull) PetscFunctionReturn(0);
909   }
910 
911   {
912     /* assemble the entire matrix onto first processor. */
913     Mat          A;
914     Mat_SeqSBAIJ *Aloc;
915     Mat_SeqBAIJ  *Bloc;
916     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
917     MatScalar    *a;
918     const char   *matname;
919 
920     /* Should this be the same type as mat? */
921     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
922     if (!rank) {
923       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
924     } else {
925       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
926     }
927     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
928     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
929     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
930     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
931 
932     /* copy over the A part */
933     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
934     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
935     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
936 
937     for (i=0; i<mbs; i++) {
938       rvals[0] = bs*(baij->rstartbs + i);
939       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
940       for (j=ai[i]; j<ai[i+1]; j++) {
941         col = (baij->cstartbs+aj[j])*bs;
942         for (k=0; k<bs; k++) {
943           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
944           col++;
945           a += bs;
946         }
947       }
948     }
949     /* copy over the B part */
950     Bloc = (Mat_SeqBAIJ*)baij->B->data;
951     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
952     for (i=0; i<mbs; i++) {
953 
954       rvals[0] = bs*(baij->rstartbs + i);
955       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
956       for (j=ai[i]; j<ai[i+1]; j++) {
957         col = baij->garray[aj[j]]*bs;
958         for (k=0; k<bs; k++) {
959           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
960           col++;
961           a += bs;
962         }
963       }
964     }
965     ierr = PetscFree(rvals);CHKERRQ(ierr);
966     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
968     /*
969        Everyone has to call to draw the matrix since the graphics waits are
970        synchronized across all processors that share the PetscDraw object
971     */
972     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
973     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
974     if (!rank) {
975       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
976       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
977     }
978     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
979     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980     ierr = MatDestroy(&A);CHKERRQ(ierr);
981   }
982   PetscFunctionReturn(0);
983 }
984 
985 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
986 {
987   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
988   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
989   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
990   PetscErrorCode ierr;
991   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
992   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
993   int            fd;
994   PetscScalar    *column_values;
995   FILE           *file;
996   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
997   PetscInt       message_count,flowcontrolcount;
998 
999   PetscFunctionBegin;
1000   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1001   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1002   nz   = bs2*(A->nz + B->nz);
1003   rlen = mat->rmap->n;
1004   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1005   if (!rank) {
1006     header[0] = MAT_FILE_CLASSID;
1007     header[1] = mat->rmap->N;
1008     header[2] = mat->cmap->N;
1009 
1010     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1011     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1012     /* get largest number of rows any processor has */
1013     range = mat->rmap->range;
1014     for (i=1; i<size; i++) {
1015       rlen = PetscMax(rlen,range[i+1] - range[i]);
1016     }
1017   } else {
1018     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1019   }
1020 
1021   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1022   /* compute lengths of each row  */
1023   for (i=0; i<a->mbs; i++) {
1024     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1025   }
1026   /* store the row lengths to the file */
1027   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1028   if (!rank) {
1029     MPI_Status status;
1030     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1031     rlen = (range[1] - range[0])/bs;
1032     for (i=0; i<rlen; i++) {
1033       for (j=0; j<bs; j++) {
1034         row_lens[i*bs+j] = bs*crow_lens[i];
1035       }
1036     }
1037     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1038     for (i=1; i<size; i++) {
1039       rlen = (range[i+1] - range[i])/bs;
1040       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1041       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1042       for (k=0; k<rlen; k++) {
1043         for (j=0; j<bs; j++) {
1044           row_lens[k*bs+j] = bs*crow_lens[k];
1045         }
1046       }
1047       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1048     }
1049     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1050     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1051   } else {
1052     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1053     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1054     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1055   }
1056   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1057 
1058   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1059      information needed to make it for each row from a block row. This does require more communication but still not more than
1060      the communication needed for the nonzero values  */
1061   nzmax = nz; /*  space a largest processor needs */
1062   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1063   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1064   cnt   = 0;
1065   for (i=0; i<a->mbs; i++) {
1066     pcnt = cnt;
1067     for (j=B->i[i]; j<B->i[i+1]; j++) {
1068       if ((col = garray[B->j[j]]) > cstart) break;
1069       for (l=0; l<bs; l++) {
1070         column_indices[cnt++] = bs*col+l;
1071       }
1072     }
1073     for (k=A->i[i]; k<A->i[i+1]; k++) {
1074       for (l=0; l<bs; l++) {
1075         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1076       }
1077     }
1078     for (; j<B->i[i+1]; j++) {
1079       for (l=0; l<bs; l++) {
1080         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1081       }
1082     }
1083     len = cnt - pcnt;
1084     for (k=1; k<bs; k++) {
1085       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1086       cnt += len;
1087     }
1088   }
1089   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1090 
1091   /* store the columns to the file */
1092   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1093   if (!rank) {
1094     MPI_Status status;
1095     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1096     for (i=1; i<size; i++) {
1097       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1098       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1099       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1100       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1101     }
1102     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1103   } else {
1104     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1105     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1106     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1107     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1108   }
1109   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1110 
1111   /* load up the numerical values */
1112   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1113   cnt  = 0;
1114   for (i=0; i<a->mbs; i++) {
1115     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1116     for (j=B->i[i]; j<B->i[i+1]; j++) {
1117       if (garray[B->j[j]] > cstart) break;
1118       for (l=0; l<bs; l++) {
1119         for (ll=0; ll<bs; ll++) {
1120           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1121         }
1122       }
1123       cnt += bs;
1124     }
1125     for (k=A->i[i]; k<A->i[i+1]; k++) {
1126       for (l=0; l<bs; l++) {
1127         for (ll=0; ll<bs; ll++) {
1128           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1129         }
1130       }
1131       cnt += bs;
1132     }
1133     for (; j<B->i[i+1]; j++) {
1134       for (l=0; l<bs; l++) {
1135         for (ll=0; ll<bs; ll++) {
1136           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1137         }
1138       }
1139       cnt += bs;
1140     }
1141     cnt += (bs-1)*rlen;
1142   }
1143   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1144 
1145   /* store the column values to the file */
1146   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1147   if (!rank) {
1148     MPI_Status status;
1149     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1150     for (i=1; i<size; i++) {
1151       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1152       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1153       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1154       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1155     }
1156     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1157   } else {
1158     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1159     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1160     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1161     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1162   }
1163   ierr = PetscFree(column_values);CHKERRQ(ierr);
1164 
1165   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1166   if (file) {
1167     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1173 {
1174   PetscErrorCode ierr;
1175   PetscBool      iascii,isdraw,issocket,isbinary;
1176 
1177   PetscFunctionBegin;
1178   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1179   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1180   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1181   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1182   if (iascii || isdraw || issocket) {
1183     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1184   } else if (isbinary) {
1185     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1186   }
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1191 {
1192   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1198 #endif
1199   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1200   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1201   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1202   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1203 #if defined(PETSC_USE_CTABLE)
1204   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1205 #else
1206   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1207 #endif
1208   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1209   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1210   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1211   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1212   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1213   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1214   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1215   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1216   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1217   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1218   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1219   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1220   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1221   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1222   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1223 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1224   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1225 #endif
1226   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1227   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1228   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1229   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1230 
1231   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1232   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1233   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1234   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1235 #if defined(PETSC_HAVE_ELEMENTAL)
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1244 {
1245   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1246   PetscErrorCode    ierr;
1247   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1248   PetscScalar       *from;
1249   const PetscScalar *x;
1250 
1251   PetscFunctionBegin;
1252   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1253   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1254 
1255   /* diagonal part */
1256   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1257   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1258 
1259   /* subdiagonal part */
1260   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1261 
1262   /* copy x into the vec slvec0 */
1263   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1264   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1265 
1266   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1267   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1268   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1269 
1270   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1271   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1272   /* supperdiagonal part */
1273   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1278 {
1279   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1280   PetscErrorCode    ierr;
1281   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1282   PetscScalar       *from;
1283   const PetscScalar *x;
1284 
1285   PetscFunctionBegin;
1286   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1287   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1288 
1289   /* diagonal part */
1290   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1291   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1292 
1293   /* subdiagonal part */
1294   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1295 
1296   /* copy x into the vec slvec0 */
1297   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1298   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1299 
1300   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1301   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1302   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1303 
1304   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   /* supperdiagonal part */
1307   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315   PetscInt       nt;
1316 
1317   PetscFunctionBegin;
1318   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1319   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1320 
1321   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1322   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1323 
1324   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   /* do diagonal part */
1326   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1327   /* do supperdiagonal part */
1328   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1329   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1330   /* do subdiagonal part */
1331   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1332   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1333   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1338 {
1339   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1340   PetscErrorCode    ierr;
1341   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1342   PetscScalar       *from,zero=0.0;
1343   const PetscScalar *x;
1344 
1345   PetscFunctionBegin;
1346   /*
1347   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1348   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1349   */
1350   /* diagonal part */
1351   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1352   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1353 
1354   /* subdiagonal part */
1355   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1356 
1357   /* copy x into the vec slvec0 */
1358   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1359   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1360   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1361   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1362 
1363   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1364   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1365   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1366 
1367   /* supperdiagonal part */
1368   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1379   /* do diagonal part */
1380   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1381   /* do supperdiagonal part */
1382   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1383   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1384 
1385   /* do subdiagonal part */
1386   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1387   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1397 {
1398   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1403   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1408 {
1409   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1414   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1419 {
1420   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1421   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1422   PetscErrorCode ierr;
1423   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1424   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1425   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1426 
1427   PetscFunctionBegin;
1428   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1429   mat->getrowactive = PETSC_TRUE;
1430 
1431   if (!mat->rowvalues && (idx || v)) {
1432     /*
1433         allocate enough space to hold information from the longest row.
1434     */
1435     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1436     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1437     PetscInt     max = 1,mbs = mat->mbs,tmp;
1438     for (i=0; i<mbs; i++) {
1439       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1440       if (max < tmp) max = tmp;
1441     }
1442     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1443   }
1444 
1445   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1446   lrow = row - brstart;  /* local row index */
1447 
1448   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1449   if (!v)   {pvA = 0; pvB = 0;}
1450   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1451   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1452   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1453   nztot = nzA + nzB;
1454 
1455   cmap = mat->garray;
1456   if (v  || idx) {
1457     if (nztot) {
1458       /* Sort by increasing column numbers, assuming A and B already sorted */
1459       PetscInt imark = -1;
1460       if (v) {
1461         *v = v_p = mat->rowvalues;
1462         for (i=0; i<nzB; i++) {
1463           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1464           else break;
1465         }
1466         imark = i;
1467         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1468         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1469       }
1470       if (idx) {
1471         *idx = idx_p = mat->rowindices;
1472         if (imark > -1) {
1473           for (i=0; i<imark; i++) {
1474             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1475           }
1476         } else {
1477           for (i=0; i<nzB; i++) {
1478             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479             else break;
1480           }
1481           imark = i;
1482         }
1483         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1484         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1485       }
1486     } else {
1487       if (idx) *idx = 0;
1488       if (v)   *v   = 0;
1489     }
1490   }
1491   *nz  = nztot;
1492   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1493   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1498 {
1499   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1500 
1501   PetscFunctionBegin;
1502   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1503   baij->getrowactive = PETSC_FALSE;
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1508 {
1509   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1510   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1511 
1512   PetscFunctionBegin;
1513   aA->getrow_utriangular = PETSC_TRUE;
1514   PetscFunctionReturn(0);
1515 }
1516 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1517 {
1518   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1519   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1520 
1521   PetscFunctionBegin;
1522   aA->getrow_utriangular = PETSC_FALSE;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1527 {
1528   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1533   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1538 {
1539   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1544   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1549    Input: isrow       - distributed(parallel),
1550           iscol_local - locally owned (seq)
1551 */
1552 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1553 {
1554   PetscErrorCode ierr;
1555   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1556   const PetscInt *ptr1,*ptr2;
1557 
1558   PetscFunctionBegin;
1559   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1560   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1561   if (sz1 > sz2) {
1562     *flg = PETSC_FALSE;
1563     PetscFunctionReturn(0);
1564   }
1565 
1566   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1567   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1568 
1569   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1570   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1571   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1572   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1573   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1574   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1575 
1576   nmatch=0;
1577   k     = 0;
1578   for (i=0; i<sz1; i++){
1579     for (j=k; j<sz2; j++){
1580       if (a1[i] == a2[j]) {
1581         k = j; nmatch++;
1582         break;
1583       }
1584     }
1585   }
1586   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1587   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1588   ierr = PetscFree(a1);CHKERRQ(ierr);
1589   ierr = PetscFree(a2);CHKERRQ(ierr);
1590   if (nmatch < sz1) {
1591     *flg = PETSC_FALSE;
1592   } else {
1593     *flg = PETSC_TRUE;
1594   }
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1599 {
1600   PetscErrorCode ierr;
1601   IS             iscol_local;
1602   PetscInt       csize;
1603   PetscBool      isequal;
1604 
1605   PetscFunctionBegin;
1606   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1607   if (call == MAT_REUSE_MATRIX) {
1608     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1609     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1610   } else {
1611     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1612     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1613     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1614   }
1615 
1616   /* now call MatCreateSubMatrix_MPIBAIJ() */
1617   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1618   if (call == MAT_INITIAL_MATRIX) {
1619     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1620     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1626 {
1627   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1628   PetscErrorCode ierr;
1629 
1630   PetscFunctionBegin;
1631   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1632   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1633   PetscFunctionReturn(0);
1634 }
1635 
1636 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1637 {
1638   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1639   Mat            A  = a->A,B = a->B;
1640   PetscErrorCode ierr;
1641   PetscReal      isend[5],irecv[5];
1642 
1643   PetscFunctionBegin;
1644   info->block_size = (PetscReal)matin->rmap->bs;
1645 
1646   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1647 
1648   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1649   isend[3] = info->memory;  isend[4] = info->mallocs;
1650 
1651   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1652 
1653   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1654   isend[3] += info->memory;  isend[4] += info->mallocs;
1655   if (flag == MAT_LOCAL) {
1656     info->nz_used      = isend[0];
1657     info->nz_allocated = isend[1];
1658     info->nz_unneeded  = isend[2];
1659     info->memory       = isend[3];
1660     info->mallocs      = isend[4];
1661   } else if (flag == MAT_GLOBAL_MAX) {
1662     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1663 
1664     info->nz_used      = irecv[0];
1665     info->nz_allocated = irecv[1];
1666     info->nz_unneeded  = irecv[2];
1667     info->memory       = irecv[3];
1668     info->mallocs      = irecv[4];
1669   } else if (flag == MAT_GLOBAL_SUM) {
1670     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1671 
1672     info->nz_used      = irecv[0];
1673     info->nz_allocated = irecv[1];
1674     info->nz_unneeded  = irecv[2];
1675     info->memory       = irecv[3];
1676     info->mallocs      = irecv[4];
1677   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1678   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1679   info->fill_ratio_needed = 0;
1680   info->factor_mallocs    = 0;
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1685 {
1686   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1687   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1688   PetscErrorCode ierr;
1689 
1690   PetscFunctionBegin;
1691   switch (op) {
1692   case MAT_NEW_NONZERO_LOCATIONS:
1693   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1694   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1695   case MAT_KEEP_NONZERO_PATTERN:
1696   case MAT_SUBMAT_SINGLEIS:
1697   case MAT_NEW_NONZERO_LOCATION_ERR:
1698     MatCheckPreallocated(A,1);
1699     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1700     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1701     break;
1702   case MAT_ROW_ORIENTED:
1703     MatCheckPreallocated(A,1);
1704     a->roworiented = flg;
1705 
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1708     break;
1709   case MAT_NEW_DIAGONALS:
1710     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1711     break;
1712   case MAT_IGNORE_OFF_PROC_ENTRIES:
1713     a->donotstash = flg;
1714     break;
1715   case MAT_USE_HASH_TABLE:
1716     a->ht_flag = flg;
1717     break;
1718   case MAT_HERMITIAN:
1719     MatCheckPreallocated(A,1);
1720     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1721     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1722 #if defined(PETSC_USE_COMPLEX)
1723     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1724 #endif
1725     break;
1726   case MAT_SPD:
1727     A->spd_set = PETSC_TRUE;
1728     A->spd     = flg;
1729     if (flg) {
1730       A->symmetric                  = PETSC_TRUE;
1731       A->structurally_symmetric     = PETSC_TRUE;
1732       A->symmetric_set              = PETSC_TRUE;
1733       A->structurally_symmetric_set = PETSC_TRUE;
1734     }
1735     break;
1736   case MAT_SYMMETRIC:
1737     MatCheckPreallocated(A,1);
1738     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1739     break;
1740   case MAT_STRUCTURALLY_SYMMETRIC:
1741     MatCheckPreallocated(A,1);
1742     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_SYMMETRY_ETERNAL:
1745     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1746     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1747     break;
1748   case MAT_IGNORE_LOWER_TRIANGULAR:
1749     aA->ignore_ltriangular = flg;
1750     break;
1751   case MAT_ERROR_LOWER_TRIANGULAR:
1752     aA->ignore_ltriangular = flg;
1753     break;
1754   case MAT_GETROW_UPPERTRIANGULAR:
1755     aA->getrow_utriangular = flg;
1756     break;
1757   default:
1758     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1759   }
1760   PetscFunctionReturn(0);
1761 }
1762 
1763 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1764 {
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   if (reuse == MAT_INITIAL_MATRIX) {
1769     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1770   }  else if (reuse == MAT_REUSE_MATRIX) {
1771     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1777 {
1778   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1779   Mat            a     = baij->A, b=baij->B;
1780   PetscErrorCode ierr;
1781   PetscInt       nv,m,n;
1782   PetscBool      flg;
1783 
1784   PetscFunctionBegin;
1785   if (ll != rr) {
1786     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1787     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1788   }
1789   if (!ll) PetscFunctionReturn(0);
1790 
1791   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1792   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1793 
1794   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1795   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1796 
1797   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1798 
1799   /* left diagonalscale the off-diagonal part */
1800   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1801 
1802   /* scale the diagonal part */
1803   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1804 
1805   /* right diagonalscale the off-diagonal part */
1806   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1807   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1812 {
1813   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1814   PetscErrorCode ierr;
1815 
1816   PetscFunctionBegin;
1817   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1822 
1823 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1824 {
1825   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1826   Mat            a,b,c,d;
1827   PetscBool      flg;
1828   PetscErrorCode ierr;
1829 
1830   PetscFunctionBegin;
1831   a = matA->A; b = matA->B;
1832   c = matB->A; d = matB->B;
1833 
1834   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1835   if (flg) {
1836     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1837   }
1838   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1843 {
1844   PetscErrorCode ierr;
1845   PetscBool      isbaij;
1846 
1847   PetscFunctionBegin;
1848   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1849   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1850   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1851   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1852     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1853     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1854     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1855   } else {
1856     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1857     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1858 
1859     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1860     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1861   }
1862   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1867 {
1868   PetscErrorCode ierr;
1869 
1870   PetscFunctionBegin;
1871   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1876 {
1877   PetscErrorCode ierr;
1878   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1879   PetscBLASInt   bnz,one=1;
1880   Mat_SeqSBAIJ   *xa,*ya;
1881   Mat_SeqBAIJ    *xb,*yb;
1882 
1883   PetscFunctionBegin;
1884   if (str == SAME_NONZERO_PATTERN) {
1885     PetscScalar alpha = a;
1886     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1887     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1888     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1889     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1890     xb   = (Mat_SeqBAIJ*)xx->B->data;
1891     yb   = (Mat_SeqBAIJ*)yy->B->data;
1892     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1893     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1894     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1895   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1896     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1897     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1898     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1899   } else {
1900     Mat      B;
1901     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1902     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1903     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1904     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1905     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1906     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1907     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1908     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1909     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1910     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1911     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1912     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1913     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1914     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1915     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1916     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1917     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1918     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1919     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1920     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1921   }
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1926 {
1927   PetscErrorCode ierr;
1928   PetscInt       i;
1929   PetscBool      flg;
1930 
1931   PetscFunctionBegin;
1932   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1933   for (i=0; i<n; i++) {
1934     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1935     if (!flg) {
1936       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1937     }
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1943 {
1944   PetscErrorCode ierr;
1945   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1946   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1947 
1948   PetscFunctionBegin;
1949   if (!Y->preallocated) {
1950     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1951   } else if (!aij->nz) {
1952     PetscInt nonew = aij->nonew;
1953     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1954     aij->nonew = nonew;
1955   }
1956   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1961 {
1962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1967   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1968   if (d) {
1969     PetscInt rstart;
1970     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1971     *d += rstart/A->rmap->bs;
1972 
1973   }
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1978 {
1979   PetscFunctionBegin;
1980   *a = ((Mat_MPISBAIJ*)A->data)->A;
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 /* -------------------------------------------------------------------*/
1985 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1986                                        MatGetRow_MPISBAIJ,
1987                                        MatRestoreRow_MPISBAIJ,
1988                                        MatMult_MPISBAIJ,
1989                                /*  4*/ MatMultAdd_MPISBAIJ,
1990                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1991                                        MatMultAdd_MPISBAIJ,
1992                                        0,
1993                                        0,
1994                                        0,
1995                                /* 10*/ 0,
1996                                        0,
1997                                        0,
1998                                        MatSOR_MPISBAIJ,
1999                                        MatTranspose_MPISBAIJ,
2000                                /* 15*/ MatGetInfo_MPISBAIJ,
2001                                        MatEqual_MPISBAIJ,
2002                                        MatGetDiagonal_MPISBAIJ,
2003                                        MatDiagonalScale_MPISBAIJ,
2004                                        MatNorm_MPISBAIJ,
2005                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
2006                                        MatAssemblyEnd_MPISBAIJ,
2007                                        MatSetOption_MPISBAIJ,
2008                                        MatZeroEntries_MPISBAIJ,
2009                                /* 24*/ 0,
2010                                        0,
2011                                        0,
2012                                        0,
2013                                        0,
2014                                /* 29*/ MatSetUp_MPISBAIJ,
2015                                        0,
2016                                        0,
2017                                        MatGetDiagonalBlock_MPISBAIJ,
2018                                        0,
2019                                /* 34*/ MatDuplicate_MPISBAIJ,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                        0,
2024                                /* 39*/ MatAXPY_MPISBAIJ,
2025                                        MatCreateSubMatrices_MPISBAIJ,
2026                                        MatIncreaseOverlap_MPISBAIJ,
2027                                        MatGetValues_MPISBAIJ,
2028                                        MatCopy_MPISBAIJ,
2029                                /* 44*/ 0,
2030                                        MatScale_MPISBAIJ,
2031                                        MatShift_MPISBAIJ,
2032                                        0,
2033                                        0,
2034                                /* 49*/ 0,
2035                                        0,
2036                                        0,
2037                                        0,
2038                                        0,
2039                                /* 54*/ 0,
2040                                        0,
2041                                        MatSetUnfactored_MPISBAIJ,
2042                                        0,
2043                                        MatSetValuesBlocked_MPISBAIJ,
2044                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
2045                                        0,
2046                                        0,
2047                                        0,
2048                                        0,
2049                                /* 64*/ 0,
2050                                        0,
2051                                        0,
2052                                        0,
2053                                        0,
2054                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2055                                        0,
2056                                        0,
2057                                        0,
2058                                        0,
2059                                /* 74*/ 0,
2060                                        0,
2061                                        0,
2062                                        0,
2063                                        0,
2064                                /* 79*/ 0,
2065                                        0,
2066                                        0,
2067                                        0,
2068                                        MatLoad_MPISBAIJ,
2069                                /* 84*/ 0,
2070                                        0,
2071                                        0,
2072                                        0,
2073                                        0,
2074                                /* 89*/ 0,
2075                                        0,
2076                                        0,
2077                                        0,
2078                                        0,
2079                                /* 94*/ 0,
2080                                        0,
2081                                        0,
2082                                        0,
2083                                        0,
2084                                /* 99*/ 0,
2085                                        0,
2086                                        0,
2087                                        0,
2088                                        0,
2089                                /*104*/ 0,
2090                                        MatRealPart_MPISBAIJ,
2091                                        MatImaginaryPart_MPISBAIJ,
2092                                        MatGetRowUpperTriangular_MPISBAIJ,
2093                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2094                                /*109*/ 0,
2095                                        0,
2096                                        0,
2097                                        0,
2098                                        MatMissingDiagonal_MPISBAIJ,
2099                                /*114*/ 0,
2100                                        0,
2101                                        0,
2102                                        0,
2103                                        0,
2104                                /*119*/ 0,
2105                                        0,
2106                                        0,
2107                                        0,
2108                                        0,
2109                                /*124*/ 0,
2110                                        0,
2111                                        0,
2112                                        0,
2113                                        0,
2114                                /*129*/ 0,
2115                                        0,
2116                                        0,
2117                                        0,
2118                                        0,
2119                                /*134*/ 0,
2120                                        0,
2121                                        0,
2122                                        0,
2123                                        0,
2124                                /*139*/ MatSetBlockSizes_Default,
2125                                        0,
2126                                        0,
2127                                        0,
2128                                        0,
2129                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2130 };
2131 
2132 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2133 {
2134   Mat_MPISBAIJ   *b;
2135   PetscErrorCode ierr;
2136   PetscInt       i,mbs,Mbs;
2137   PetscMPIInt    size;
2138 
2139   PetscFunctionBegin;
2140   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2141   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2142   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2143   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2144 
2145   b   = (Mat_MPISBAIJ*)B->data;
2146   mbs = B->rmap->n/bs;
2147   Mbs = B->rmap->N/bs;
2148   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2149 
2150   B->rmap->bs = bs;
2151   b->bs2      = bs*bs;
2152   b->mbs      = mbs;
2153   b->Mbs      = Mbs;
2154   b->nbs      = B->cmap->n/bs;
2155   b->Nbs      = B->cmap->N/bs;
2156 
2157   for (i=0; i<=b->size; i++) {
2158     b->rangebs[i] = B->rmap->range[i]/bs;
2159   }
2160   b->rstartbs = B->rmap->rstart/bs;
2161   b->rendbs   = B->rmap->rend/bs;
2162 
2163   b->cstartbs = B->cmap->rstart/bs;
2164   b->cendbs   = B->cmap->rend/bs;
2165 
2166 #if defined(PETSC_USE_CTABLE)
2167   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2168 #else
2169   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2170 #endif
2171   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2172   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2173   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2174   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2175   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2176   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2177   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2178   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2179   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2180 
2181   /* Because the B will have been resized we simply destroy it and create a new one each time */
2182   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2183   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2184   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2185   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2186   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2187   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2188 
2189   if (!B->preallocated) {
2190     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2191     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2192     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2193     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2194     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2195   }
2196 
2197   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2198   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2199 
2200   B->preallocated  = PETSC_TRUE;
2201   B->was_assembled = PETSC_FALSE;
2202   B->assembled     = PETSC_FALSE;
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2207 {
2208   PetscInt       m,rstart,cstart,cend;
2209   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2210   const PetscInt *JJ    =0;
2211   PetscScalar    *values=0;
2212   PetscErrorCode ierr;
2213 
2214   PetscFunctionBegin;
2215   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2216   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2217   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2218   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2219   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2220   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2221   m      = B->rmap->n/bs;
2222   rstart = B->rmap->rstart/bs;
2223   cstart = B->cmap->rstart/bs;
2224   cend   = B->cmap->rend/bs;
2225 
2226   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2227   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2228   for (i=0; i<m; i++) {
2229     nz = ii[i+1] - ii[i];
2230     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2231     nz_max = PetscMax(nz_max,nz);
2232     JJ     = jj + ii[i];
2233     for (j=0; j<nz; j++) {
2234       if (*JJ >= cstart) break;
2235       JJ++;
2236     }
2237     d = 0;
2238     for (; j<nz; j++) {
2239       if (*JJ++ >= cend) break;
2240       d++;
2241     }
2242     d_nnz[i] = d;
2243     o_nnz[i] = nz - d;
2244   }
2245   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2246   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2247 
2248   values = (PetscScalar*)V;
2249   if (!values) {
2250     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2251     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2252   }
2253   for (i=0; i<m; i++) {
2254     PetscInt          row    = i + rstart;
2255     PetscInt          ncols  = ii[i+1] - ii[i];
2256     const PetscInt    *icols = jj + ii[i];
2257     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2258     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2259   }
2260 
2261   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2262   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2263   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2264   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 /*MC
2269    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2270    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2271    the matrix is stored.
2272 
2273   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2274   can call MatSetOption(Mat, MAT_HERMITIAN);
2275 
2276    Options Database Keys:
2277 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2278 
2279   Level: beginner
2280 
2281 .seealso: MatCreateMPISBAIJ
2282 M*/
2283 
2284 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2285 {
2286   Mat_MPISBAIJ   *b;
2287   PetscErrorCode ierr;
2288   PetscBool      flg = PETSC_FALSE;
2289 
2290   PetscFunctionBegin;
2291   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2292   B->data = (void*)b;
2293   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2294 
2295   B->ops->destroy = MatDestroy_MPISBAIJ;
2296   B->ops->view    = MatView_MPISBAIJ;
2297   B->assembled    = PETSC_FALSE;
2298   B->insertmode   = NOT_SET_VALUES;
2299 
2300   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2301   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2302 
2303   /* build local table of row and column ownerships */
2304   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2305 
2306   /* build cache for off array entries formed */
2307   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2308 
2309   b->donotstash  = PETSC_FALSE;
2310   b->colmap      = NULL;
2311   b->garray      = NULL;
2312   b->roworiented = PETSC_TRUE;
2313 
2314   /* stuff used in block assembly */
2315   b->barray = 0;
2316 
2317   /* stuff used for matrix vector multiply */
2318   b->lvec    = 0;
2319   b->Mvctx   = 0;
2320   b->slvec0  = 0;
2321   b->slvec0b = 0;
2322   b->slvec1  = 0;
2323   b->slvec1a = 0;
2324   b->slvec1b = 0;
2325   b->sMvctx  = 0;
2326 
2327   /* stuff for MatGetRow() */
2328   b->rowindices   = 0;
2329   b->rowvalues    = 0;
2330   b->getrowactive = PETSC_FALSE;
2331 
2332   /* hash table stuff */
2333   b->ht           = 0;
2334   b->hd           = 0;
2335   b->ht_size      = 0;
2336   b->ht_flag      = PETSC_FALSE;
2337   b->ht_fact      = 0;
2338   b->ht_total_ct  = 0;
2339   b->ht_insert_ct = 0;
2340 
2341   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2342   b->ijonly = PETSC_FALSE;
2343 
2344   b->in_loc = 0;
2345   b->v_loc  = 0;
2346   b->n_loc  = 0;
2347 
2348   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2349   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2350   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2351   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2352 #if defined(PETSC_HAVE_ELEMENTAL)
2353   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2354 #endif
2355   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2356   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_XAIJ);CHKERRQ(ierr);
2357 
2358   B->symmetric                  = PETSC_TRUE;
2359   B->structurally_symmetric     = PETSC_TRUE;
2360   B->symmetric_set              = PETSC_TRUE;
2361   B->structurally_symmetric_set = PETSC_TRUE;
2362   B->symmetric_eternal          = PETSC_TRUE;
2363 
2364   B->hermitian                  = PETSC_FALSE;
2365   B->hermitian_set              = PETSC_FALSE;
2366 
2367   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2368   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2369   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2370   if (flg) {
2371     PetscReal fact = 1.39;
2372     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2373     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2374     if (fact <= 1.0) fact = 1.39;
2375     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2376     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2377   }
2378   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 /*MC
2383    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2384 
2385    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2386    and MATMPISBAIJ otherwise.
2387 
2388    Options Database Keys:
2389 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2390 
2391   Level: beginner
2392 
2393 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2394 M*/
2395 
2396 /*@C
2397    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2398    the user should preallocate the matrix storage by setting the parameters
2399    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2400    performance can be increased by more than a factor of 50.
2401 
2402    Collective on Mat
2403 
2404    Input Parameters:
2405 +  B - the matrix
2406 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2407           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2408 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2409            submatrix  (same for all local rows)
2410 .  d_nnz - array containing the number of block nonzeros in the various block rows
2411            in the upper triangular and diagonal part of the in diagonal portion of the local
2412            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2413            for the diagonal entry and set a value even if it is zero.
2414 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2415            submatrix (same for all local rows).
2416 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2417            off-diagonal portion of the local submatrix that is right of the diagonal
2418            (possibly different for each block row) or NULL.
2419 
2420 
2421    Options Database Keys:
2422 .   -mat_no_unroll - uses code that does not unroll the loops in the
2423                      block calculations (much slower)
2424 .   -mat_block_size - size of the blocks to use
2425 
2426    Notes:
2427 
2428    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2429    than it must be used on all processors that share the object for that argument.
2430 
2431    If the *_nnz parameter is given then the *_nz parameter is ignored
2432 
2433    Storage Information:
2434    For a square global matrix we define each processor's diagonal portion
2435    to be its local rows and the corresponding columns (a square submatrix);
2436    each processor's off-diagonal portion encompasses the remainder of the
2437    local matrix (a rectangular submatrix).
2438 
2439    The user can specify preallocated storage for the diagonal part of
2440    the local submatrix with either d_nz or d_nnz (not both).  Set
2441    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2442    memory allocation.  Likewise, specify preallocated storage for the
2443    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2444 
2445    You can call MatGetInfo() to get information on how effective the preallocation was;
2446    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2447    You can also run with the option -info and look for messages with the string
2448    malloc in them to see if additional memory allocation was needed.
2449 
2450    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2451    the figure below we depict these three local rows and all columns (0-11).
2452 
2453 .vb
2454            0 1 2 3 4 5 6 7 8 9 10 11
2455           --------------------------
2456    row 3  |. . . d d d o o o o  o  o
2457    row 4  |. . . d d d o o o o  o  o
2458    row 5  |. . . d d d o o o o  o  o
2459           --------------------------
2460 .ve
2461 
2462    Thus, any entries in the d locations are stored in the d (diagonal)
2463    submatrix, and any entries in the o locations are stored in the
2464    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2465    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2466 
2467    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2468    plus the diagonal part of the d matrix,
2469    and o_nz should indicate the number of block nonzeros per row in the o matrix
2470 
2471    In general, for PDE problems in which most nonzeros are near the diagonal,
2472    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2473    or you will get TERRIBLE performance; see the users' manual chapter on
2474    matrices.
2475 
2476    Level: intermediate
2477 
2478 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2479 @*/
2480 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2481 {
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2486   PetscValidType(B,1);
2487   PetscValidLogicalCollectiveInt(B,bs,2);
2488   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 /*@C
2493    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2494    (block compressed row).  For good matrix assembly performance
2495    the user should preallocate the matrix storage by setting the parameters
2496    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2497    performance can be increased by more than a factor of 50.
2498 
2499    Collective on MPI_Comm
2500 
2501    Input Parameters:
2502 +  comm - MPI communicator
2503 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2504           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2505 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2506            This value should be the same as the local size used in creating the
2507            y vector for the matrix-vector product y = Ax.
2508 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2509            This value should be the same as the local size used in creating the
2510            x vector for the matrix-vector product y = Ax.
2511 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2512 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2513 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2514            submatrix  (same for all local rows)
2515 .  d_nnz - array containing the number of block nonzeros in the various block rows
2516            in the upper triangular portion of the in diagonal portion of the local
2517            (possibly different for each block block row) or NULL.
2518            If you plan to factor the matrix you must leave room for the diagonal entry and
2519            set its value even if it is zero.
2520 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2521            submatrix (same for all local rows).
2522 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2523            off-diagonal portion of the local submatrix (possibly different for
2524            each block row) or NULL.
2525 
2526    Output Parameter:
2527 .  A - the matrix
2528 
2529    Options Database Keys:
2530 .   -mat_no_unroll - uses code that does not unroll the loops in the
2531                      block calculations (much slower)
2532 .   -mat_block_size - size of the blocks to use
2533 .   -mat_mpi - use the parallel matrix data structures even on one processor
2534                (defaults to using SeqBAIJ format on one processor)
2535 
2536    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2537    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2538    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2539 
2540    Notes:
2541    The number of rows and columns must be divisible by blocksize.
2542    This matrix type does not support complex Hermitian operation.
2543 
2544    The user MUST specify either the local or global matrix dimensions
2545    (possibly both).
2546 
2547    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2548    than it must be used on all processors that share the object for that argument.
2549 
2550    If the *_nnz parameter is given then the *_nz parameter is ignored
2551 
2552    Storage Information:
2553    For a square global matrix we define each processor's diagonal portion
2554    to be its local rows and the corresponding columns (a square submatrix);
2555    each processor's off-diagonal portion encompasses the remainder of the
2556    local matrix (a rectangular submatrix).
2557 
2558    The user can specify preallocated storage for the diagonal part of
2559    the local submatrix with either d_nz or d_nnz (not both).  Set
2560    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2561    memory allocation.  Likewise, specify preallocated storage for the
2562    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2563 
2564    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2565    the figure below we depict these three local rows and all columns (0-11).
2566 
2567 .vb
2568            0 1 2 3 4 5 6 7 8 9 10 11
2569           --------------------------
2570    row 3  |. . . d d d o o o o  o  o
2571    row 4  |. . . d d d o o o o  o  o
2572    row 5  |. . . d d d o o o o  o  o
2573           --------------------------
2574 .ve
2575 
2576    Thus, any entries in the d locations are stored in the d (diagonal)
2577    submatrix, and any entries in the o locations are stored in the
2578    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2579    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2580 
2581    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2582    plus the diagonal part of the d matrix,
2583    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2584    In general, for PDE problems in which most nonzeros are near the diagonal,
2585    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2586    or you will get TERRIBLE performance; see the users' manual chapter on
2587    matrices.
2588 
2589    Level: intermediate
2590 
2591 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2592 @*/
2593 
2594 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2595 {
2596   PetscErrorCode ierr;
2597   PetscMPIInt    size;
2598 
2599   PetscFunctionBegin;
2600   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2601   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2602   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2603   if (size > 1) {
2604     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2605     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2606   } else {
2607     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2608     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2609   }
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 
2614 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2615 {
2616   Mat            mat;
2617   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2618   PetscErrorCode ierr;
2619   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2620   PetscScalar    *array;
2621 
2622   PetscFunctionBegin;
2623   *newmat = 0;
2624 
2625   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2626   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2627   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2628   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2629   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2630 
2631   mat->factortype   = matin->factortype;
2632   mat->preallocated = PETSC_TRUE;
2633   mat->assembled    = PETSC_TRUE;
2634   mat->insertmode   = NOT_SET_VALUES;
2635 
2636   a      = (Mat_MPISBAIJ*)mat->data;
2637   a->bs2 = oldmat->bs2;
2638   a->mbs = oldmat->mbs;
2639   a->nbs = oldmat->nbs;
2640   a->Mbs = oldmat->Mbs;
2641   a->Nbs = oldmat->Nbs;
2642 
2643   a->size         = oldmat->size;
2644   a->rank         = oldmat->rank;
2645   a->donotstash   = oldmat->donotstash;
2646   a->roworiented  = oldmat->roworiented;
2647   a->rowindices   = 0;
2648   a->rowvalues    = 0;
2649   a->getrowactive = PETSC_FALSE;
2650   a->barray       = 0;
2651   a->rstartbs     = oldmat->rstartbs;
2652   a->rendbs       = oldmat->rendbs;
2653   a->cstartbs     = oldmat->cstartbs;
2654   a->cendbs       = oldmat->cendbs;
2655 
2656   /* hash table stuff */
2657   a->ht           = 0;
2658   a->hd           = 0;
2659   a->ht_size      = 0;
2660   a->ht_flag      = oldmat->ht_flag;
2661   a->ht_fact      = oldmat->ht_fact;
2662   a->ht_total_ct  = 0;
2663   a->ht_insert_ct = 0;
2664 
2665   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2666   if (oldmat->colmap) {
2667 #if defined(PETSC_USE_CTABLE)
2668     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2669 #else
2670     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2671     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2672     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2673 #endif
2674   } else a->colmap = 0;
2675 
2676   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2677     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2678     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2679     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2680   } else a->garray = 0;
2681 
2682   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2683   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2684   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2685   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2686   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2687 
2688   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2689   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2690   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2691   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2692 
2693   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2694   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2695   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2696   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2697   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2698   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2699   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2700   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2701   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2702   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2703   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2704   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2705   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2706 
2707   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2708   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2709   a->sMvctx = oldmat->sMvctx;
2710   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2711 
2712   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2713   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2714   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2715   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2716   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2717   *newmat = mat;
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2722 {
2723   PetscErrorCode ierr;
2724   PetscInt       i,nz,j,rstart,rend;
2725   PetscScalar    *vals,*buf;
2726   MPI_Comm       comm;
2727   MPI_Status     status;
2728   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2729   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2730   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2731   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2732   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2733   PetscInt       dcount,kmax,k,nzcount,tmp;
2734   int            fd;
2735   PetscBool      isbinary;
2736 
2737   PetscFunctionBegin;
2738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2739   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
2740 
2741   /* force binary viewer to load .info file if it has not yet done so */
2742   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2743   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2744   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2745   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2746   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2747   if (bs < 0) bs = 1;
2748 
2749   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2750   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2751   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2752   if (!rank) {
2753     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
2754     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2755     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2756   }
2757 
2758   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2759   M    = header[1];
2760   N    = header[2];
2761 
2762   /* If global sizes are set, check if they are consistent with that given in the file */
2763   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2764   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2765 
2766   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2767 
2768   /*
2769      This code adds extra rows to make sure the number of rows is
2770      divisible by the blocksize
2771   */
2772   Mbs        = M/bs;
2773   extra_rows = bs - M + bs*(Mbs);
2774   if (extra_rows == bs) extra_rows = 0;
2775   else                  Mbs++;
2776   if (extra_rows &&!rank) {
2777     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2778   }
2779 
2780   /* determine ownership of all rows */
2781   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2782     mbs = Mbs/size + ((Mbs % size) > rank);
2783     m   = mbs*bs;
2784   } else { /* User Set */
2785     m   = newmat->rmap->n;
2786     mbs = m/bs;
2787   }
2788   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2789   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2790   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2791   rowners[0] = 0;
2792   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2793   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2794   rstart = rowners[rank];
2795   rend   = rowners[rank+1];
2796 
2797   /* distribute row lengths to all processors */
2798   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2799   if (!rank) {
2800     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2801     ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
2802     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2803     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2804     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2805     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2806     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2807   } else {
2808     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2809   }
2810 
2811   if (!rank) {   /* procs[0] */
2812     /* calculate the number of nonzeros on each processor */
2813     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2814     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2815     for (i=0; i<size; i++) {
2816       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2817         procsnz[i] += rowlengths[j];
2818       }
2819     }
2820     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2821 
2822     /* determine max buffer needed and allocate it */
2823     maxnz = 0;
2824     for (i=0; i<size; i++) {
2825       maxnz = PetscMax(maxnz,procsnz[i]);
2826     }
2827     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2828 
2829     /* read in my part of the matrix column indices  */
2830     nz     = procsnz[0];
2831     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2832     mycols = ibuf;
2833     if (size == 1) nz -= extra_rows;
2834     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2835     if (size == 1) {
2836       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2837     }
2838 
2839     /* read in every ones (except the last) and ship off */
2840     for (i=1; i<size-1; i++) {
2841       nz   = procsnz[i];
2842       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2843       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2844     }
2845     /* read in the stuff for the last proc */
2846     if (size != 1) {
2847       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2848       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
2849       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2850       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2851     }
2852     ierr = PetscFree(cols);CHKERRQ(ierr);
2853   } else {  /* procs[i], i>0 */
2854     /* determine buffer space needed for message */
2855     nz = 0;
2856     for (i=0; i<m; i++) nz += locrowlens[i];
2857     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2858     mycols = ibuf;
2859     /* receive message of column indices*/
2860     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2861     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2862     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2863   }
2864 
2865   /* loop over local rows, determining number of off diagonal entries */
2866   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2867   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2868   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2869   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2870   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2871   rowcount = 0;
2872   nzcount  = 0;
2873   for (i=0; i<mbs; i++) {
2874     dcount  = 0;
2875     odcount = 0;
2876     for (j=0; j<bs; j++) {
2877       kmax = locrowlens[rowcount];
2878       for (k=0; k<kmax; k++) {
2879         tmp = mycols[nzcount++]/bs; /* block col. index */
2880         if (!mask[tmp]) {
2881           mask[tmp] = 1;
2882           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2883           else masked1[dcount++] = tmp; /* entry in diag portion */
2884         }
2885       }
2886       rowcount++;
2887     }
2888 
2889     dlens[i]  = dcount;  /* d_nzz[i] */
2890     odlens[i] = odcount; /* o_nzz[i] */
2891 
2892     /* zero out the mask elements we set */
2893     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2894     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2895   }
2896   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2897   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2898   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2899 
2900   if (!rank) {
2901     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2902     /* read in my part of the matrix numerical values  */
2903     nz     = procsnz[0];
2904     vals   = buf;
2905     mycols = ibuf;
2906     if (size == 1) nz -= extra_rows;
2907     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2908     if (size == 1) {
2909       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2910     }
2911 
2912     /* insert into matrix */
2913     jj = rstart*bs;
2914     for (i=0; i<m; i++) {
2915       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2916       mycols += locrowlens[i];
2917       vals   += locrowlens[i];
2918       jj++;
2919     }
2920 
2921     /* read in other processors (except the last one) and ship out */
2922     for (i=1; i<size-1; i++) {
2923       nz   = procsnz[i];
2924       vals = buf;
2925       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2926       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2927     }
2928     /* the last proc */
2929     if (size != 1) {
2930       nz   = procsnz[i] - extra_rows;
2931       vals = buf;
2932       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
2933       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2934       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2935     }
2936     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2937 
2938   } else {
2939     /* receive numeric values */
2940     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2941 
2942     /* receive message of values*/
2943     vals   = buf;
2944     mycols = ibuf;
2945     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2946     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2947     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2948 
2949     /* insert into matrix */
2950     jj = rstart*bs;
2951     for (i=0; i<m; i++) {
2952       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2953       mycols += locrowlens[i];
2954       vals   += locrowlens[i];
2955       jj++;
2956     }
2957   }
2958 
2959   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2960   ierr = PetscFree(buf);CHKERRQ(ierr);
2961   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2962   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2963   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2964   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2965   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2966   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 /*XXXXX@
2971    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2972 
2973    Input Parameters:
2974 .  mat  - the matrix
2975 .  fact - factor
2976 
2977    Not Collective on Mat, each process can have a different hash factor
2978 
2979    Level: advanced
2980 
2981   Notes:
2982    This can also be set by the command line option: -mat_use_hash_table fact
2983 
2984 .seealso: MatSetOption()
2985 @XXXXX*/
2986 
2987 
2988 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2989 {
2990   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2991   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2992   PetscReal      atmp;
2993   PetscReal      *work,*svalues,*rvalues;
2994   PetscErrorCode ierr;
2995   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2996   PetscMPIInt    rank,size;
2997   PetscInt       *rowners_bs,dest,count,source;
2998   PetscScalar    *va;
2999   MatScalar      *ba;
3000   MPI_Status     stat;
3001 
3002   PetscFunctionBegin;
3003   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3004   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
3005   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3006 
3007   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3008   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3009 
3010   bs  = A->rmap->bs;
3011   mbs = a->mbs;
3012   Mbs = a->Mbs;
3013   ba  = b->a;
3014   bi  = b->i;
3015   bj  = b->j;
3016 
3017   /* find ownerships */
3018   rowners_bs = A->rmap->range;
3019 
3020   /* each proc creates an array to be distributed */
3021   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3022   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
3023 
3024   /* row_max for B */
3025   if (rank != size-1) {
3026     for (i=0; i<mbs; i++) {
3027       ncols = bi[1] - bi[0]; bi++;
3028       brow  = bs*i;
3029       for (j=0; j<ncols; j++) {
3030         bcol = bs*(*bj);
3031         for (kcol=0; kcol<bs; kcol++) {
3032           col  = bcol + kcol;                /* local col index */
3033           col += rowners_bs[rank+1];      /* global col index */
3034           for (krow=0; krow<bs; krow++) {
3035             atmp = PetscAbsScalar(*ba); ba++;
3036             row  = brow + krow;   /* local row index */
3037             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3038             if (work[col] < atmp) work[col] = atmp;
3039           }
3040         }
3041         bj++;
3042       }
3043     }
3044 
3045     /* send values to its owners */
3046     for (dest=rank+1; dest<size; dest++) {
3047       svalues = work + rowners_bs[dest];
3048       count   = rowners_bs[dest+1]-rowners_bs[dest];
3049       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3050     }
3051   }
3052 
3053   /* receive values */
3054   if (rank) {
3055     rvalues = work;
3056     count   = rowners_bs[rank+1]-rowners_bs[rank];
3057     for (source=0; source<rank; source++) {
3058       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3059       /* process values */
3060       for (i=0; i<count; i++) {
3061         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3062       }
3063     }
3064   }
3065 
3066   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3067   ierr = PetscFree(work);CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3072 {
3073   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3074   PetscErrorCode    ierr;
3075   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3076   PetscScalar       *x,*ptr,*from;
3077   Vec               bb1;
3078   const PetscScalar *b;
3079 
3080   PetscFunctionBegin;
3081   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3082   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3083 
3084   if (flag == SOR_APPLY_UPPER) {
3085     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3086     PetscFunctionReturn(0);
3087   }
3088 
3089   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3090     if (flag & SOR_ZERO_INITIAL_GUESS) {
3091       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3092       its--;
3093     }
3094 
3095     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3096     while (its--) {
3097 
3098       /* lower triangular part: slvec0b = - B^T*xx */
3099       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3100 
3101       /* copy xx into slvec0a */
3102       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3103       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3104       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3105       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3106 
3107       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3108 
3109       /* copy bb into slvec1a */
3110       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3111       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3112       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3113       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3114 
3115       /* set slvec1b = 0 */
3116       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3117 
3118       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3119       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3120       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3121       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3122 
3123       /* upper triangular part: bb1 = bb1 - B*x */
3124       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3125 
3126       /* local diagonal sweep */
3127       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3128     }
3129     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3130   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3131     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3132   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3133     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3134   } else if (flag & SOR_EISENSTAT) {
3135     Vec               xx1;
3136     PetscBool         hasop;
3137     const PetscScalar *diag;
3138     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3139     PetscInt          i,n;
3140 
3141     if (!mat->xx1) {
3142       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3143       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3144     }
3145     xx1 = mat->xx1;
3146     bb1 = mat->bb1;
3147 
3148     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3149 
3150     if (!mat->diag) {
3151       /* this is wrong for same matrix with new nonzero values */
3152       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3153       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3154     }
3155     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3156 
3157     if (hasop) {
3158       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3159       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3160     } else {
3161       /*
3162           These two lines are replaced by code that may be a bit faster for a good compiler
3163       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3164       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3165       */
3166       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3167       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3168       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3169       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3170       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3171       if (omega == 1.0) {
3172         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3173         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3174       } else {
3175         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3176         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3177       }
3178       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3179       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3180       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3181       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3182     }
3183 
3184     /* multiply off-diagonal portion of matrix */
3185     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3186     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3187     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3188     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3189     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3190     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3191     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3192     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3193     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3194     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3195 
3196     /* local sweep */
3197     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3198     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3199   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3200   PetscFunctionReturn(0);
3201 }
3202 
3203 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3204 {
3205   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3206   PetscErrorCode ierr;
3207   Vec            lvec1,bb1;
3208 
3209   PetscFunctionBegin;
3210   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3211   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3212 
3213   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3214     if (flag & SOR_ZERO_INITIAL_GUESS) {
3215       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3216       its--;
3217     }
3218 
3219     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3220     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3221     while (its--) {
3222       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3223 
3224       /* lower diagonal part: bb1 = bb - B^T*xx */
3225       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3226       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3227 
3228       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3229       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3230       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3231 
3232       /* upper diagonal part: bb1 = bb1 - B*x */
3233       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3234       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3235 
3236       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3237 
3238       /* diagonal sweep */
3239       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3240     }
3241     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3242     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3243   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3244   PetscFunctionReturn(0);
3245 }
3246 
3247 /*@
3248      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3249          CSR format the local rows.
3250 
3251    Collective on MPI_Comm
3252 
3253    Input Parameters:
3254 +  comm - MPI communicator
3255 .  bs - the block size, only a block size of 1 is supported
3256 .  m - number of local rows (Cannot be PETSC_DECIDE)
3257 .  n - This value should be the same as the local size used in creating the
3258        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3259        calculated if N is given) For square matrices n is almost always m.
3260 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3261 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3262 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
3263 .   j - column indices
3264 -   a - matrix values
3265 
3266    Output Parameter:
3267 .   mat - the matrix
3268 
3269    Level: intermediate
3270 
3271    Notes:
3272        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3273      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3274      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3275 
3276        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3277 
3278 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3279           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3280 @*/
3281 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3282 {
3283   PetscErrorCode ierr;
3284 
3285 
3286   PetscFunctionBegin;
3287   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3288   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3289   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3290   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3291   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3292   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 
3297 /*@C
3298    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3299    (the default parallel PETSc format).
3300 
3301    Collective on MPI_Comm
3302 
3303    Input Parameters:
3304 +  B - the matrix
3305 .  bs - the block size
3306 .  i - the indices into j for the start of each local row (starts with zero)
3307 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3308 -  v - optional values in the matrix
3309 
3310    Level: developer
3311 
3312 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3313 @*/
3314 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3315 {
3316   PetscErrorCode ierr;
3317 
3318   PetscFunctionBegin;
3319   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3320   PetscFunctionReturn(0);
3321 }
3322 
3323 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3324 {
3325   PetscErrorCode ierr;
3326   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3327   PetscInt       *indx;
3328   PetscScalar    *values;
3329 
3330   PetscFunctionBegin;
3331   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3332   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3333     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3334     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3335     PetscInt       *bindx,rmax=a->rmax,j;
3336 
3337     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3338     mbs = m/bs; Nbs = N/cbs;
3339     if (n == PETSC_DECIDE) {
3340       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3341     }
3342     /* Check sum(n) = Nbs */
3343     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3344     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3345 
3346     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3347     rstart -= mbs;
3348 
3349     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3350     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3351     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3352     for (i=0; i<mbs; i++) {
3353       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3354       nnz  = nnz/bs;
3355       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3356       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3357       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3358     }
3359     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3360     ierr = PetscFree(bindx);CHKERRQ(ierr);
3361 
3362     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3363     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3364     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3365     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3366     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3367     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3368   }
3369 
3370   /* numeric phase */
3371   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3372   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3373 
3374   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3375   for (i=0; i<m; i++) {
3376     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3377     Ii   = i + rstart;
3378     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3379     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3380   }
3381   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3382   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3383   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386