xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision c5b5d8d5d69aed0b9a5d08ebbb2e85e542e74f68)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #if defined(PETSC_HAVE_SCALAPACK)
11 PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
12 #endif
13 
14 /* This could be moved to matimpl.h */
15 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
16 {
17   Mat            preallocator;
18   PetscInt       r,rstart,rend;
19   PetscInt       bs,i,m,n,M,N;
20   PetscBool      cong = PETSC_TRUE;
21   PetscErrorCode ierr;
22 
23   PetscFunctionBegin;
24   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
25   PetscValidLogicalCollectiveInt(B,nm,2);
26   for (i = 0; i < nm; i++) {
27     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
28     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
29     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
30   }
31   PetscValidLogicalCollectiveBool(B,fill,5);
32   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
33   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
34   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
35   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
36   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
37   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
38   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
39   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
40   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
41   for (r = rstart; r < rend; ++r) {
42     PetscInt          ncols;
43     const PetscInt    *row;
44     const PetscScalar *vals;
45 
46     for (i = 0; i < nm; i++) {
47       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
48       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
49       if (symm && symm[i]) {
50         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
51       }
52       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
53     }
54   }
55   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
56   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
57   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
58   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
59   PetscFunctionReturn(0);
60 }
61 
62 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
63 {
64   Mat            B;
65   PetscErrorCode ierr;
66   PetscInt       r;
67 
68   PetscFunctionBegin;
69   if (reuse != MAT_REUSE_MATRIX) {
70     PetscBool symm = PETSC_TRUE,isdense;
71     PetscInt  bs;
72 
73     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
74     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
75     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
76     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
77     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
78     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
79     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
80     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
81     if (!isdense) {
82       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
83       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
84       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
85     } else {
86       ierr = MatSetUp(B);CHKERRQ(ierr);
87     }
88   } else {
89     B    = *newmat;
90     ierr = MatZeroEntries(B);CHKERRQ(ierr);
91   }
92 
93   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
94   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
95     PetscInt          ncols;
96     const PetscInt    *row;
97     const PetscScalar *vals;
98 
99     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
100     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
101 #if defined(PETSC_USE_COMPLEX)
102     if (A->hermitian) {
103       PetscInt i;
104       for (i = 0; i < ncols; i++) {
105         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
106       }
107     } else {
108       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109     }
110 #else
111     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
112 #endif
113     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
114   }
115   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
116   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
117   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
118 
119   if (reuse == MAT_INPLACE_MATRIX) {
120     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
121   } else {
122     *newmat = B;
123   }
124   PetscFunctionReturn(0);
125 }
126 
127 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
128 {
129   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
130   PetscErrorCode ierr;
131 
132   PetscFunctionBegin;
133   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
134   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
139 {
140   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
141   PetscErrorCode ierr;
142 
143   PetscFunctionBegin;
144   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
145   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
150   { \
151     brow = row/bs;  \
152     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
153     rmax = aimax[brow]; nrow = ailen[brow]; \
154     bcol = col/bs; \
155     ridx = row % bs; cidx = col % bs; \
156     low  = 0; high = nrow; \
157     while (high-low > 3) { \
158       t = (low+high)/2; \
159       if (rp[t] > bcol) high = t; \
160       else              low  = t; \
161     } \
162     for (_i=low; _i<high; _i++) { \
163       if (rp[_i] > bcol) break; \
164       if (rp[_i] == bcol) { \
165         bap = ap + bs2*_i + bs*cidx + ridx; \
166         if (addv == ADD_VALUES) *bap += value;  \
167         else                    *bap  = value;  \
168         goto a_noinsert; \
169       } \
170     } \
171     if (a->nonew == 1) goto a_noinsert; \
172     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
173     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
174     N = nrow++ - 1;  \
175     /* shift up all the later entries in this row */ \
176     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
177     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
178     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
179     rp[_i]                      = bcol;  \
180     ap[bs2*_i + bs*cidx + ridx] = value;  \
181     A->nonzerostate++;\
182 a_noinsert:; \
183     ailen[brow] = nrow; \
184   }
185 
186 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
187   { \
188     brow = row/bs;  \
189     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
190     rmax = bimax[brow]; nrow = bilen[brow]; \
191     bcol = col/bs; \
192     ridx = row % bs; cidx = col % bs; \
193     low  = 0; high = nrow; \
194     while (high-low > 3) { \
195       t = (low+high)/2; \
196       if (rp[t] > bcol) high = t; \
197       else              low  = t; \
198     } \
199     for (_i=low; _i<high; _i++) { \
200       if (rp[_i] > bcol) break; \
201       if (rp[_i] == bcol) { \
202         bap = ap + bs2*_i + bs*cidx + ridx; \
203         if (addv == ADD_VALUES) *bap += value;  \
204         else                    *bap  = value;  \
205         goto b_noinsert; \
206       } \
207     } \
208     if (b->nonew == 1) goto b_noinsert; \
209     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
210     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
211     N = nrow++ - 1;  \
212     /* shift up all the later entries in this row */ \
213     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
214     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
215     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
216     rp[_i]                      = bcol;  \
217     ap[bs2*_i + bs*cidx + ridx] = value;  \
218     B->nonzerostate++;\
219 b_noinsert:; \
220     bilen[brow] = nrow; \
221   }
222 
223 /* Only add/insert a(i,j) with i<=j (blocks).
224    Any a(i,j) with i>j input by user is ingored or generates an error
225 */
226 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
227 {
228   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
229   MatScalar      value;
230   PetscBool      roworiented = baij->roworiented;
231   PetscErrorCode ierr;
232   PetscInt       i,j,row,col;
233   PetscInt       rstart_orig=mat->rmap->rstart;
234   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
235   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
236 
237   /* Some Variables required in the macro */
238   Mat          A     = baij->A;
239   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
240   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
241   MatScalar    *aa   =a->a;
242 
243   Mat         B     = baij->B;
244   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
245   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
246   MatScalar   *ba   =b->a;
247 
248   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
249   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
250   MatScalar *ap,*bap;
251 
252   /* for stash */
253   PetscInt  n_loc, *in_loc = NULL;
254   MatScalar *v_loc = NULL;
255 
256   PetscFunctionBegin;
257   if (!baij->donotstash) {
258     if (n > baij->n_loc) {
259       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
260       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
261       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
262       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
263 
264       baij->n_loc = n;
265     }
266     in_loc = baij->in_loc;
267     v_loc  = baij->v_loc;
268   }
269 
270   for (i=0; i<m; i++) {
271     if (im[i] < 0) continue;
272     if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
273     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
274       row = im[i] - rstart_orig;              /* local row index */
275       for (j=0; j<n; j++) {
276         if (im[i]/bs > in[j]/bs) {
277           if (a->ignore_ltriangular) {
278             continue;    /* ignore lower triangular blocks */
279           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
280         }
281         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
282           col  = in[j] - cstart_orig;         /* local col index */
283           brow = row/bs; bcol = col/bs;
284           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
285           if (roworiented) value = v[i*n+j];
286           else             value = v[i+j*m];
287           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
288           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
289         } else if (in[j] < 0) continue;
290         else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291         else {  /* off-diag entry (B) */
292           if (mat->was_assembled) {
293             if (!baij->colmap) {
294               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
295             }
296 #if defined(PETSC_USE_CTABLE)
297             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
298             col  = col - 1;
299 #else
300             col = baij->colmap[in[j]/bs] - 1;
301 #endif
302             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
303               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
304               col  =  in[j];
305               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
306               B    = baij->B;
307               b    = (Mat_SeqBAIJ*)(B)->data;
308               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
309               ba   = b->a;
310             } else col += in[j]%bs;
311           } else col = in[j];
312           if (roworiented) value = v[i*n+j];
313           else             value = v[i+j*m];
314           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
315           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
316         }
317       }
318     } else {  /* off processor entry */
319       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
320       if (!baij->donotstash) {
321         mat->assembled = PETSC_FALSE;
322         n_loc          = 0;
323         for (j=0; j<n; j++) {
324           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
325           in_loc[n_loc] = in[j];
326           if (roworiented) {
327             v_loc[n_loc] = v[i*n+j];
328           } else {
329             v_loc[n_loc] = v[j*m+i];
330           }
331           n_loc++;
332         }
333         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
334       }
335     }
336   }
337   PetscFunctionReturn(0);
338 }
339 
340 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
341 {
342   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
343   PetscErrorCode    ierr;
344   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
345   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
346   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
347   PetscBool         roworiented=a->roworiented;
348   const PetscScalar *value     = v;
349   MatScalar         *ap,*aa = a->a,*bap;
350 
351   PetscFunctionBegin;
352   if (col < row) {
353     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
354     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
355   }
356   rp   = aj + ai[row];
357   ap   = aa + bs2*ai[row];
358   rmax = imax[row];
359   nrow = ailen[row];
360   value = v;
361   low   = 0;
362   high  = nrow;
363 
364   while (high-low > 7) {
365     t = (low+high)/2;
366     if (rp[t] > col) high = t;
367     else             low  = t;
368   }
369   for (i=low; i<high; i++) {
370     if (rp[i] > col) break;
371     if (rp[i] == col) {
372       bap = ap +  bs2*i;
373       if (roworiented) {
374         if (is == ADD_VALUES) {
375           for (ii=0; ii<bs; ii++) {
376             for (jj=ii; jj<bs2; jj+=bs) {
377               bap[jj] += *value++;
378             }
379           }
380         } else {
381           for (ii=0; ii<bs; ii++) {
382             for (jj=ii; jj<bs2; jj+=bs) {
383               bap[jj] = *value++;
384             }
385           }
386         }
387       } else {
388         if (is == ADD_VALUES) {
389           for (ii=0; ii<bs; ii++) {
390             for (jj=0; jj<bs; jj++) {
391               *bap++ += *value++;
392             }
393           }
394         } else {
395           for (ii=0; ii<bs; ii++) {
396             for (jj=0; jj<bs; jj++) {
397               *bap++  = *value++;
398             }
399           }
400         }
401       }
402       goto noinsert2;
403     }
404   }
405   if (nonew == 1) goto noinsert2;
406   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
407   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
408   N = nrow++ - 1; high++;
409   /* shift up all the later entries in this row */
410   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
411   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
412   rp[i] = col;
413   bap   = ap +  bs2*i;
414   if (roworiented) {
415     for (ii=0; ii<bs; ii++) {
416       for (jj=ii; jj<bs2; jj+=bs) {
417         bap[jj] = *value++;
418       }
419     }
420   } else {
421     for (ii=0; ii<bs; ii++) {
422       for (jj=0; jj<bs; jj++) {
423         *bap++ = *value++;
424       }
425     }
426   }
427   noinsert2:;
428   ailen[row] = nrow;
429   PetscFunctionReturn(0);
430 }
431 
432 /*
433    This routine is exactly duplicated in mpibaij.c
434 */
435 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
436 {
437   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
438   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
439   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
440   PetscErrorCode    ierr;
441   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
442   PetscBool         roworiented=a->roworiented;
443   const PetscScalar *value     = v;
444   MatScalar         *ap,*aa = a->a,*bap;
445 
446   PetscFunctionBegin;
447   rp   = aj + ai[row];
448   ap   = aa + bs2*ai[row];
449   rmax = imax[row];
450   nrow = ailen[row];
451   low  = 0;
452   high = nrow;
453   value = v;
454   while (high-low > 7) {
455     t = (low+high)/2;
456     if (rp[t] > col) high = t;
457     else             low  = t;
458   }
459   for (i=low; i<high; i++) {
460     if (rp[i] > col) break;
461     if (rp[i] == col) {
462       bap = ap +  bs2*i;
463       if (roworiented) {
464         if (is == ADD_VALUES) {
465           for (ii=0; ii<bs; ii++) {
466             for (jj=ii; jj<bs2; jj+=bs) {
467               bap[jj] += *value++;
468             }
469           }
470         } else {
471           for (ii=0; ii<bs; ii++) {
472             for (jj=ii; jj<bs2; jj+=bs) {
473               bap[jj] = *value++;
474             }
475           }
476         }
477       } else {
478         if (is == ADD_VALUES) {
479           for (ii=0; ii<bs; ii++,value+=bs) {
480             for (jj=0; jj<bs; jj++) {
481               bap[jj] += value[jj];
482             }
483             bap += bs;
484           }
485         } else {
486           for (ii=0; ii<bs; ii++,value+=bs) {
487             for (jj=0; jj<bs; jj++) {
488               bap[jj]  = value[jj];
489             }
490             bap += bs;
491           }
492         }
493       }
494       goto noinsert2;
495     }
496   }
497   if (nonew == 1) goto noinsert2;
498   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
499   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
500   N = nrow++ - 1; high++;
501   /* shift up all the later entries in this row */
502   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
503   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
504   rp[i] = col;
505   bap   = ap +  bs2*i;
506   if (roworiented) {
507     for (ii=0; ii<bs; ii++) {
508       for (jj=ii; jj<bs2; jj+=bs) {
509         bap[jj] = *value++;
510       }
511     }
512   } else {
513     for (ii=0; ii<bs; ii++) {
514       for (jj=0; jj<bs; jj++) {
515         *bap++ = *value++;
516       }
517     }
518   }
519   noinsert2:;
520   ailen[row] = nrow;
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This routine could be optimized by removing the need for the block copy below and passing stride information
526   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
527 */
528 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
529 {
530   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
531   const MatScalar *value;
532   MatScalar       *barray     =baij->barray;
533   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
534   PetscErrorCode  ierr;
535   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
536   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
537   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
538 
539   PetscFunctionBegin;
540   if (!barray) {
541     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
542     baij->barray = barray;
543   }
544 
545   if (roworiented) {
546     stepval = (n-1)*bs;
547   } else {
548     stepval = (m-1)*bs;
549   }
550   for (i=0; i<m; i++) {
551     if (im[i] < 0) continue;
552     if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
553     if (im[i] >= rstart && im[i] < rend) {
554       row = im[i] - rstart;
555       for (j=0; j<n; j++) {
556         if (im[i] > in[j]) {
557           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
558           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
559         }
560         /* If NumCol = 1 then a copy is not required */
561         if ((roworiented) && (n == 1)) {
562           barray = (MatScalar*) v + i*bs2;
563         } else if ((!roworiented) && (m == 1)) {
564           barray = (MatScalar*) v + j*bs2;
565         } else { /* Here a copy is required */
566           if (roworiented) {
567             value = v + i*(stepval+bs)*bs + j*bs;
568           } else {
569             value = v + j*(stepval+bs)*bs + i*bs;
570           }
571           for (ii=0; ii<bs; ii++,value+=stepval) {
572             for (jj=0; jj<bs; jj++) {
573               *barray++ = *value++;
574             }
575           }
576           barray -=bs2;
577         }
578 
579         if (in[j] >= cstart && in[j] < cend) {
580           col  = in[j] - cstart;
581           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
582         } else if (in[j] < 0) continue;
583         else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
584         else {
585           if (mat->was_assembled) {
586             if (!baij->colmap) {
587               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588             }
589 
590 #if defined(PETSC_USE_DEBUG)
591 #if defined(PETSC_USE_CTABLE)
592             { PetscInt data;
593               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
594               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
595             }
596 #else
597             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
598 #endif
599 #endif
600 #if defined(PETSC_USE_CTABLE)
601             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
602             col  = (col - 1)/bs;
603 #else
604             col = (baij->colmap[in[j]] - 1)/bs;
605 #endif
606             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
607               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
608               col  = in[j];
609             }
610           } else col = in[j];
611           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
612         }
613       }
614     } else {
615       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
616       if (!baij->donotstash) {
617         if (roworiented) {
618           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
619         } else {
620           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
621         }
622       }
623     }
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
670 {
671   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
672   PetscErrorCode ierr;
673   PetscReal      sum[2],*lnorm2;
674 
675   PetscFunctionBegin;
676   if (baij->size == 1) {
677     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
678   } else {
679     if (type == NORM_FROBENIUS) {
680       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
681       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
682       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
683       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
684       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
685       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
686       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
687       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
688     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
689       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
690       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
691       PetscReal    *rsum,*rsum2,vabs;
692       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
693       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
694       MatScalar    *v;
695 
696       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
697       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
698       /* Amat */
699       v = amat->a; jj = amat->j;
700       for (brow=0; brow<mbs; brow++) {
701         grow = bs*(rstart + brow);
702         nz   = amat->i[brow+1] - amat->i[brow];
703         for (bcol=0; bcol<nz; bcol++) {
704           gcol = bs*(rstart + *jj); jj++;
705           for (col=0; col<bs; col++) {
706             for (row=0; row<bs; row++) {
707               vabs            = PetscAbsScalar(*v); v++;
708               rsum[gcol+col] += vabs;
709               /* non-diagonal block */
710               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
711             }
712           }
713         }
714         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
715       }
716       /* Bmat */
717       v = bmat->a; jj = bmat->j;
718       for (brow=0; brow<mbs; brow++) {
719         grow = bs*(rstart + brow);
720         nz = bmat->i[brow+1] - bmat->i[brow];
721         for (bcol=0; bcol<nz; bcol++) {
722           gcol = bs*garray[*jj]; jj++;
723           for (col=0; col<bs; col++) {
724             for (row=0; row<bs; row++) {
725               vabs            = PetscAbsScalar(*v); v++;
726               rsum[gcol+col] += vabs;
727               rsum[grow+row] += vabs;
728             }
729           }
730         }
731         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
732       }
733       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
734       *norm = 0.0;
735       for (col=0; col<mat->cmap->N; col++) {
736         if (rsum2[col] > *norm) *norm = rsum2[col];
737       }
738       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
739     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748   PetscInt       nstash,reallocs;
749 
750   PetscFunctionBegin;
751   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
752 
753   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
754   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
755   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
756   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
757   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
758   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
763 {
764   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
765   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
766   PetscErrorCode ierr;
767   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
768   PetscInt       *row,*col;
769   PetscBool      other_disassembled;
770   PetscMPIInt    n;
771   PetscBool      r1,r2,r3;
772   MatScalar      *val;
773 
774   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
775   PetscFunctionBegin;
776   if (!baij->donotstash &&  !mat->nooffprocentries) {
777     while (1) {
778       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
779       if (!flg) break;
780 
781       for (i=0; i<n;) {
782         /* Now identify the consecutive vals belonging to the same row */
783         for (j=i,rstart=row[j]; j<n; j++) {
784           if (row[j] != rstart) break;
785         }
786         if (j < n) ncols = j-i;
787         else       ncols = n-i;
788         /* Now assemble all these values with a single function call */
789         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
790         i    = j;
791       }
792     }
793     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
794     /* Now process the block-stash. Since the values are stashed column-oriented,
795        set the roworiented flag to column oriented, and after MatSetValues()
796        restore the original flags */
797     r1 = baij->roworiented;
798     r2 = a->roworiented;
799     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
800 
801     baij->roworiented = PETSC_FALSE;
802     a->roworiented    = PETSC_FALSE;
803 
804     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
805     while (1) {
806       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
807       if (!flg) break;
808 
809       for (i=0; i<n;) {
810         /* Now identify the consecutive vals belonging to the same row */
811         for (j=i,rstart=row[j]; j<n; j++) {
812           if (row[j] != rstart) break;
813         }
814         if (j < n) ncols = j-i;
815         else       ncols = n-i;
816         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
817         i    = j;
818       }
819     }
820     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
821 
822     baij->roworiented = r1;
823     a->roworiented    = r2;
824 
825     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
826   }
827 
828   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
830 
831   /* determine if any processor has disassembled, if so we must
832      also disassemble ourselfs, in order that we may reassemble. */
833   /*
834      if nonzero structure of submatrix B cannot change then we know that
835      no processor disassembled thus we can skip this stuff
836   */
837   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
838     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
839     if (mat->was_assembled && !other_disassembled) {
840       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
841     }
842   }
843 
844   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
845     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
846   }
847   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
848   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
849 
850   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
851 
852   baij->rowvalues = 0;
853 
854   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
855   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
856     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
857     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
858   }
859   PetscFunctionReturn(0);
860 }
861 
862 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
863 #include <petscdraw.h>
864 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
865 {
866   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
867   PetscErrorCode    ierr;
868   PetscInt          bs   = mat->rmap->bs;
869   PetscMPIInt       rank = baij->rank;
870   PetscBool         iascii,isdraw;
871   PetscViewer       sviewer;
872   PetscViewerFormat format;
873 
874   PetscFunctionBegin;
875   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
876   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
877   if (iascii) {
878     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
879     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
880       MatInfo info;
881       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
882       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
884       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
885       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
886       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
887       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
889       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
890       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
891       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
892       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
893       PetscFunctionReturn(0);
894     } else if (format == PETSC_VIEWER_ASCII_INFO) {
895       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
896       PetscFunctionReturn(0);
897     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
898       PetscFunctionReturn(0);
899     }
900   }
901 
902   if (isdraw) {
903     PetscDraw draw;
904     PetscBool isnull;
905     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
906     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
907     if (isnull) PetscFunctionReturn(0);
908   }
909 
910   {
911     /* assemble the entire matrix onto first processor. */
912     Mat          A;
913     Mat_SeqSBAIJ *Aloc;
914     Mat_SeqBAIJ  *Bloc;
915     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
916     MatScalar    *a;
917     const char   *matname;
918 
919     /* Should this be the same type as mat? */
920     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
921     if (!rank) {
922       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
923     } else {
924       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
925     }
926     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
927     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
928     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
929     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
930 
931     /* copy over the A part */
932     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
933     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
934     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
935 
936     for (i=0; i<mbs; i++) {
937       rvals[0] = bs*(baij->rstartbs + i);
938       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
939       for (j=ai[i]; j<ai[i+1]; j++) {
940         col = (baij->cstartbs+aj[j])*bs;
941         for (k=0; k<bs; k++) {
942           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
943           col++;
944           a += bs;
945         }
946       }
947     }
948     /* copy over the B part */
949     Bloc = (Mat_SeqBAIJ*)baij->B->data;
950     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
951     for (i=0; i<mbs; i++) {
952 
953       rvals[0] = bs*(baij->rstartbs + i);
954       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
955       for (j=ai[i]; j<ai[i+1]; j++) {
956         col = baij->garray[aj[j]]*bs;
957         for (k=0; k<bs; k++) {
958           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
959           col++;
960           a += bs;
961         }
962       }
963     }
964     ierr = PetscFree(rvals);CHKERRQ(ierr);
965     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
966     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
967     /*
968        Everyone has to call to draw the matrix since the graphics waits are
969        synchronized across all processors that share the PetscDraw object
970     */
971     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
972     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
973     if (!rank) {
974       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
975       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
976     }
977     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
978     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979     ierr = MatDestroy(&A);CHKERRQ(ierr);
980   }
981   PetscFunctionReturn(0);
982 }
983 
984 /* Used for both MPIBAIJ and MPISBAIJ matrices */
985 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
986 
987 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
988 {
989   PetscErrorCode ierr;
990   PetscBool      iascii,isdraw,issocket,isbinary;
991 
992   PetscFunctionBegin;
993   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
994   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
995   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
996   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
997   if (iascii || isdraw || issocket) {
998     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
999   } else if (isbinary) {
1000     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1001   }
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1006 {
1007   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1008   PetscErrorCode ierr;
1009 
1010   PetscFunctionBegin;
1011 #if defined(PETSC_USE_LOG)
1012   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1013 #endif
1014   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1015   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1016   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1017   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1018 #if defined(PETSC_USE_CTABLE)
1019   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1020 #else
1021   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1022 #endif
1023   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1024   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1025   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1026   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1027   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1028   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1029   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1030   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1031   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1032   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1033   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1034   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1035   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1036   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1037   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1038 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1039   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1040 #endif
1041   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1042   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1043   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1044   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1045 
1046   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1047   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1048   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1049   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1050   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1051 #if defined(PETSC_HAVE_ELEMENTAL)
1052   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1053 #endif
1054 #if defined(PETSC_HAVE_SCALAPACK)
1055   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_scalapack_C",NULL);CHKERRQ(ierr);
1056 #endif
1057   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1058   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1063 {
1064   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1065   PetscErrorCode    ierr;
1066   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1067   PetscScalar       *from;
1068   const PetscScalar *x;
1069 
1070   PetscFunctionBegin;
1071   /* diagonal part */
1072   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1073   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1074 
1075   /* subdiagonal part */
1076   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1077 
1078   /* copy x into the vec slvec0 */
1079   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1080   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1081 
1082   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1083   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1084   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1085 
1086   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1087   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1088   /* supperdiagonal part */
1089   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1094 {
1095   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1096   PetscErrorCode    ierr;
1097   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1098   PetscScalar       *from;
1099   const PetscScalar *x;
1100 
1101   PetscFunctionBegin;
1102   /* diagonal part */
1103   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1104   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1105 
1106   /* subdiagonal part */
1107   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1108 
1109   /* copy x into the vec slvec0 */
1110   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1111   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1112 
1113   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1114   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1115   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1116 
1117   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1118   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1119   /* supperdiagonal part */
1120   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1127   PetscErrorCode ierr;
1128   PetscInt       nt;
1129 
1130   PetscFunctionBegin;
1131   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1132   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1133 
1134   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1135   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1136 
1137   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1138   /* do diagonal part */
1139   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1140   /* do supperdiagonal part */
1141   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1143   /* do subdiagonal part */
1144   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1145   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1146   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1151 {
1152   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1153   PetscErrorCode    ierr;
1154   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1155   PetscScalar       *from,zero=0.0;
1156   const PetscScalar *x;
1157 
1158   PetscFunctionBegin;
1159   /* diagonal part */
1160   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1161   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1162 
1163   /* subdiagonal part */
1164   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1165 
1166   /* copy x into the vec slvec0 */
1167   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1168   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1169   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1170   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1171 
1172   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1173   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1174   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175 
1176   /* supperdiagonal part */
1177   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1182 {
1183   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1184   PetscErrorCode    ierr;
1185   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1186   PetscScalar       *from,zero=0.0;
1187   const PetscScalar *x;
1188 
1189   PetscFunctionBegin;
1190   /* diagonal part */
1191   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1192   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1193 
1194   /* subdiagonal part */
1195   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1196 
1197   /* copy x into the vec slvec0 */
1198   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1199   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1200   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1201   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1202 
1203   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1204   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1205   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206 
1207   /* supperdiagonal part */
1208   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1213 {
1214   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1215   PetscErrorCode ierr;
1216 
1217   PetscFunctionBegin;
1218   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1219   /* do diagonal part */
1220   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1221   /* do supperdiagonal part */
1222   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1224 
1225   /* do subdiagonal part */
1226   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1227   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1228   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 /*
1233   This only works correctly for square matrices where the subblock A->A is the
1234    diagonal block
1235 */
1236 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1237 {
1238   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1243   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1248 {
1249   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1254   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1259 {
1260   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1261   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1262   PetscErrorCode ierr;
1263   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1264   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1265   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1266 
1267   PetscFunctionBegin;
1268   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1269   mat->getrowactive = PETSC_TRUE;
1270 
1271   if (!mat->rowvalues && (idx || v)) {
1272     /*
1273         allocate enough space to hold information from the longest row.
1274     */
1275     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1276     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1277     PetscInt     max = 1,mbs = mat->mbs,tmp;
1278     for (i=0; i<mbs; i++) {
1279       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1280       if (max < tmp) max = tmp;
1281     }
1282     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1283   }
1284 
1285   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1286   lrow = row - brstart;  /* local row index */
1287 
1288   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1289   if (!v)   {pvA = 0; pvB = 0;}
1290   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1291   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1292   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1293   nztot = nzA + nzB;
1294 
1295   cmap = mat->garray;
1296   if (v  || idx) {
1297     if (nztot) {
1298       /* Sort by increasing column numbers, assuming A and B already sorted */
1299       PetscInt imark = -1;
1300       if (v) {
1301         *v = v_p = mat->rowvalues;
1302         for (i=0; i<nzB; i++) {
1303           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1304           else break;
1305         }
1306         imark = i;
1307         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1308         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1309       }
1310       if (idx) {
1311         *idx = idx_p = mat->rowindices;
1312         if (imark > -1) {
1313           for (i=0; i<imark; i++) {
1314             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1315           }
1316         } else {
1317           for (i=0; i<nzB; i++) {
1318             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1319             else break;
1320           }
1321           imark = i;
1322         }
1323         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1324         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1325       }
1326     } else {
1327       if (idx) *idx = 0;
1328       if (v)   *v   = 0;
1329     }
1330   }
1331   *nz  = nztot;
1332   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1333   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1338 {
1339   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1340 
1341   PetscFunctionBegin;
1342   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1343   baij->getrowactive = PETSC_FALSE;
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1348 {
1349   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1350   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1351 
1352   PetscFunctionBegin;
1353   aA->getrow_utriangular = PETSC_TRUE;
1354   PetscFunctionReturn(0);
1355 }
1356 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1357 {
1358   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1359   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1360 
1361   PetscFunctionBegin;
1362   aA->getrow_utriangular = PETSC_FALSE;
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1367 {
1368   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1373   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1378 {
1379   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1384   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1389    Input: isrow       - distributed(parallel),
1390           iscol_local - locally owned (seq)
1391 */
1392 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1393 {
1394   PetscErrorCode ierr;
1395   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1396   const PetscInt *ptr1,*ptr2;
1397 
1398   PetscFunctionBegin;
1399   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1400   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1401   if (sz1 > sz2) {
1402     *flg = PETSC_FALSE;
1403     PetscFunctionReturn(0);
1404   }
1405 
1406   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1407   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1408 
1409   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1410   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1411   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1412   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1413   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1414   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1415 
1416   nmatch=0;
1417   k     = 0;
1418   for (i=0; i<sz1; i++){
1419     for (j=k; j<sz2; j++){
1420       if (a1[i] == a2[j]) {
1421         k = j; nmatch++;
1422         break;
1423       }
1424     }
1425   }
1426   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1427   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1428   ierr = PetscFree(a1);CHKERRQ(ierr);
1429   ierr = PetscFree(a2);CHKERRQ(ierr);
1430   if (nmatch < sz1) {
1431     *flg = PETSC_FALSE;
1432   } else {
1433     *flg = PETSC_TRUE;
1434   }
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1439 {
1440   PetscErrorCode ierr;
1441   IS             iscol_local;
1442   PetscInt       csize;
1443   PetscBool      isequal;
1444 
1445   PetscFunctionBegin;
1446   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1447   if (call == MAT_REUSE_MATRIX) {
1448     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1449     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1450   } else {
1451     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1452     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1453     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1454   }
1455 
1456   /* now call MatCreateSubMatrix_MPIBAIJ() */
1457   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1458   if (call == MAT_INITIAL_MATRIX) {
1459     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1460     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1461   }
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1466 {
1467   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1468   PetscErrorCode ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1472   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1477 {
1478   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1479   Mat            A  = a->A,B = a->B;
1480   PetscErrorCode ierr;
1481   PetscLogDouble isend[5],irecv[5];
1482 
1483   PetscFunctionBegin;
1484   info->block_size = (PetscReal)matin->rmap->bs;
1485 
1486   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1487 
1488   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1489   isend[3] = info->memory;  isend[4] = info->mallocs;
1490 
1491   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1492 
1493   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1494   isend[3] += info->memory;  isend[4] += info->mallocs;
1495   if (flag == MAT_LOCAL) {
1496     info->nz_used      = isend[0];
1497     info->nz_allocated = isend[1];
1498     info->nz_unneeded  = isend[2];
1499     info->memory       = isend[3];
1500     info->mallocs      = isend[4];
1501   } else if (flag == MAT_GLOBAL_MAX) {
1502     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1503 
1504     info->nz_used      = irecv[0];
1505     info->nz_allocated = irecv[1];
1506     info->nz_unneeded  = irecv[2];
1507     info->memory       = irecv[3];
1508     info->mallocs      = irecv[4];
1509   } else if (flag == MAT_GLOBAL_SUM) {
1510     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1511 
1512     info->nz_used      = irecv[0];
1513     info->nz_allocated = irecv[1];
1514     info->nz_unneeded  = irecv[2];
1515     info->memory       = irecv[3];
1516     info->mallocs      = irecv[4];
1517   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1518   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1519   info->fill_ratio_needed = 0;
1520   info->factor_mallocs    = 0;
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1525 {
1526   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1527   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1528   PetscErrorCode ierr;
1529 
1530   PetscFunctionBegin;
1531   switch (op) {
1532   case MAT_NEW_NONZERO_LOCATIONS:
1533   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1534   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1535   case MAT_KEEP_NONZERO_PATTERN:
1536   case MAT_SUBMAT_SINGLEIS:
1537   case MAT_NEW_NONZERO_LOCATION_ERR:
1538     MatCheckPreallocated(A,1);
1539     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1540     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1541     break;
1542   case MAT_ROW_ORIENTED:
1543     MatCheckPreallocated(A,1);
1544     a->roworiented = flg;
1545 
1546     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1547     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1548     break;
1549   case MAT_NEW_DIAGONALS:
1550   case MAT_SORTED_FULL:
1551     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1552     break;
1553   case MAT_IGNORE_OFF_PROC_ENTRIES:
1554     a->donotstash = flg;
1555     break;
1556   case MAT_USE_HASH_TABLE:
1557     a->ht_flag = flg;
1558     break;
1559   case MAT_HERMITIAN:
1560     MatCheckPreallocated(A,1);
1561     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1562 #if defined(PETSC_USE_COMPLEX)
1563     if (flg) { /* need different mat-vec ops */
1564       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1565       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1566       A->ops->multtranspose    = NULL;
1567       A->ops->multtransposeadd = NULL;
1568       A->symmetric = PETSC_FALSE;
1569     }
1570 #endif
1571     break;
1572   case MAT_SPD:
1573   case MAT_SYMMETRIC:
1574     MatCheckPreallocated(A,1);
1575     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1576 #if defined(PETSC_USE_COMPLEX)
1577     if (flg) { /* restore to use default mat-vec ops */
1578       A->ops->mult             = MatMult_MPISBAIJ;
1579       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1580       A->ops->multtranspose    = MatMult_MPISBAIJ;
1581       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1582     }
1583 #endif
1584     break;
1585   case MAT_STRUCTURALLY_SYMMETRIC:
1586     MatCheckPreallocated(A,1);
1587     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1588     break;
1589   case MAT_SYMMETRY_ETERNAL:
1590     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1591     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1592     break;
1593   case MAT_IGNORE_LOWER_TRIANGULAR:
1594     aA->ignore_ltriangular = flg;
1595     break;
1596   case MAT_ERROR_LOWER_TRIANGULAR:
1597     aA->ignore_ltriangular = flg;
1598     break;
1599   case MAT_GETROW_UPPERTRIANGULAR:
1600     aA->getrow_utriangular = flg;
1601     break;
1602   default:
1603     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1604   }
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1609 {
1610   PetscErrorCode ierr;
1611 
1612   PetscFunctionBegin;
1613   if (reuse == MAT_INITIAL_MATRIX) {
1614     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1615   }  else if (reuse == MAT_REUSE_MATRIX) {
1616     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1622 {
1623   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1624   Mat            a     = baij->A, b=baij->B;
1625   PetscErrorCode ierr;
1626   PetscInt       nv,m,n;
1627   PetscBool      flg;
1628 
1629   PetscFunctionBegin;
1630   if (ll != rr) {
1631     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1632     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1633   }
1634   if (!ll) PetscFunctionReturn(0);
1635 
1636   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1637   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1638 
1639   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1640   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1641 
1642   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1643 
1644   /* left diagonalscale the off-diagonal part */
1645   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1646 
1647   /* scale the diagonal part */
1648   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1649 
1650   /* right diagonalscale the off-diagonal part */
1651   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1652   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1657 {
1658   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1659   PetscErrorCode ierr;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1667 
1668 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1669 {
1670   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1671   Mat            a,b,c,d;
1672   PetscBool      flg;
1673   PetscErrorCode ierr;
1674 
1675   PetscFunctionBegin;
1676   a = matA->A; b = matA->B;
1677   c = matB->A; d = matB->B;
1678 
1679   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1680   if (flg) {
1681     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1682   }
1683   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1688 {
1689   PetscErrorCode ierr;
1690   PetscBool      isbaij;
1691 
1692   PetscFunctionBegin;
1693   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1694   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1695   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1696   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1697     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1698     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1699     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1700   } else {
1701     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1702     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1703 
1704     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1705     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1706   }
1707   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1712 {
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1721 {
1722   PetscErrorCode ierr;
1723   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1724   PetscBLASInt   bnz,one=1;
1725   Mat_SeqSBAIJ   *xa,*ya;
1726   Mat_SeqBAIJ    *xb,*yb;
1727 
1728   PetscFunctionBegin;
1729   if (str == SAME_NONZERO_PATTERN) {
1730     PetscScalar alpha = a;
1731     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1732     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1733     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1734     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1735     xb   = (Mat_SeqBAIJ*)xx->B->data;
1736     yb   = (Mat_SeqBAIJ*)yy->B->data;
1737     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1738     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1739     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1740   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1741     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1742     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1743     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1744   } else {
1745     Mat      B;
1746     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1747     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1748     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1749     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1750     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1751     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1752     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1753     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1754     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1755     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1756     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1757     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1758     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1759     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1760     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1761     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1762     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1763     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1764     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1765     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1771 {
1772   PetscErrorCode ierr;
1773   PetscInt       i;
1774   PetscBool      flg;
1775 
1776   PetscFunctionBegin;
1777   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1778   for (i=0; i<n; i++) {
1779     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1780     if (!flg) {
1781       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1782     }
1783   }
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1788 {
1789   PetscErrorCode ierr;
1790   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1791   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1792 
1793   PetscFunctionBegin;
1794   if (!Y->preallocated) {
1795     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1796   } else if (!aij->nz) {
1797     PetscInt nonew = aij->nonew;
1798     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1799     aij->nonew = nonew;
1800   }
1801   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1806 {
1807   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1812   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1813   if (d) {
1814     PetscInt rstart;
1815     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1816     *d += rstart/A->rmap->bs;
1817 
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1823 {
1824   PetscFunctionBegin;
1825   *a = ((Mat_MPISBAIJ*)A->data)->A;
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 /* -------------------------------------------------------------------*/
1830 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1831                                        MatGetRow_MPISBAIJ,
1832                                        MatRestoreRow_MPISBAIJ,
1833                                        MatMult_MPISBAIJ,
1834                                /*  4*/ MatMultAdd_MPISBAIJ,
1835                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1836                                        MatMultAdd_MPISBAIJ,
1837                                        0,
1838                                        0,
1839                                        0,
1840                                /* 10*/ 0,
1841                                        0,
1842                                        0,
1843                                        MatSOR_MPISBAIJ,
1844                                        MatTranspose_MPISBAIJ,
1845                                /* 15*/ MatGetInfo_MPISBAIJ,
1846                                        MatEqual_MPISBAIJ,
1847                                        MatGetDiagonal_MPISBAIJ,
1848                                        MatDiagonalScale_MPISBAIJ,
1849                                        MatNorm_MPISBAIJ,
1850                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1851                                        MatAssemblyEnd_MPISBAIJ,
1852                                        MatSetOption_MPISBAIJ,
1853                                        MatZeroEntries_MPISBAIJ,
1854                                /* 24*/ 0,
1855                                        0,
1856                                        0,
1857                                        0,
1858                                        0,
1859                                /* 29*/ MatSetUp_MPISBAIJ,
1860                                        0,
1861                                        0,
1862                                        MatGetDiagonalBlock_MPISBAIJ,
1863                                        0,
1864                                /* 34*/ MatDuplicate_MPISBAIJ,
1865                                        0,
1866                                        0,
1867                                        0,
1868                                        0,
1869                                /* 39*/ MatAXPY_MPISBAIJ,
1870                                        MatCreateSubMatrices_MPISBAIJ,
1871                                        MatIncreaseOverlap_MPISBAIJ,
1872                                        MatGetValues_MPISBAIJ,
1873                                        MatCopy_MPISBAIJ,
1874                                /* 44*/ 0,
1875                                        MatScale_MPISBAIJ,
1876                                        MatShift_MPISBAIJ,
1877                                        0,
1878                                        0,
1879                                /* 49*/ 0,
1880                                        0,
1881                                        0,
1882                                        0,
1883                                        0,
1884                                /* 54*/ 0,
1885                                        0,
1886                                        MatSetUnfactored_MPISBAIJ,
1887                                        0,
1888                                        MatSetValuesBlocked_MPISBAIJ,
1889                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1890                                        0,
1891                                        0,
1892                                        0,
1893                                        0,
1894                                /* 64*/ 0,
1895                                        0,
1896                                        0,
1897                                        0,
1898                                        0,
1899                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1900                                        0,
1901                                        MatConvert_MPISBAIJ_Basic,
1902                                        0,
1903                                        0,
1904                                /* 74*/ 0,
1905                                        0,
1906                                        0,
1907                                        0,
1908                                        0,
1909                                /* 79*/ 0,
1910                                        0,
1911                                        0,
1912                                        0,
1913                                        MatLoad_MPISBAIJ,
1914                                /* 84*/ 0,
1915                                        0,
1916                                        0,
1917                                        0,
1918                                        0,
1919                                /* 89*/ 0,
1920                                        0,
1921                                        0,
1922                                        0,
1923                                        0,
1924                                /* 94*/ 0,
1925                                        0,
1926                                        0,
1927                                        0,
1928                                        0,
1929                                /* 99*/ 0,
1930                                        0,
1931                                        0,
1932                                        0,
1933                                        0,
1934                                /*104*/ 0,
1935                                        MatRealPart_MPISBAIJ,
1936                                        MatImaginaryPart_MPISBAIJ,
1937                                        MatGetRowUpperTriangular_MPISBAIJ,
1938                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1939                                /*109*/ 0,
1940                                        0,
1941                                        0,
1942                                        0,
1943                                        MatMissingDiagonal_MPISBAIJ,
1944                                /*114*/ 0,
1945                                        0,
1946                                        0,
1947                                        0,
1948                                        0,
1949                                /*119*/ 0,
1950                                        0,
1951                                        0,
1952                                        0,
1953                                        0,
1954                                /*124*/ 0,
1955                                        0,
1956                                        0,
1957                                        0,
1958                                        0,
1959                                /*129*/ 0,
1960                                        0,
1961                                        0,
1962                                        0,
1963                                        0,
1964                                /*134*/ 0,
1965                                        0,
1966                                        0,
1967                                        0,
1968                                        0,
1969                                /*139*/ MatSetBlockSizes_Default,
1970                                        0,
1971                                        0,
1972                                        0,
1973                                        0,
1974                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1975 };
1976 
1977 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1978 {
1979   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1980   PetscErrorCode ierr;
1981   PetscInt       i,mbs,Mbs;
1982   PetscMPIInt    size;
1983 
1984   PetscFunctionBegin;
1985   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1986   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1987   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1988   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1989   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1990   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1991 
1992   mbs = B->rmap->n/bs;
1993   Mbs = B->rmap->N/bs;
1994   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1995 
1996   B->rmap->bs = bs;
1997   b->bs2      = bs*bs;
1998   b->mbs      = mbs;
1999   b->Mbs      = Mbs;
2000   b->nbs      = B->cmap->n/bs;
2001   b->Nbs      = B->cmap->N/bs;
2002 
2003   for (i=0; i<=b->size; i++) {
2004     b->rangebs[i] = B->rmap->range[i]/bs;
2005   }
2006   b->rstartbs = B->rmap->rstart/bs;
2007   b->rendbs   = B->rmap->rend/bs;
2008 
2009   b->cstartbs = B->cmap->rstart/bs;
2010   b->cendbs   = B->cmap->rend/bs;
2011 
2012 #if defined(PETSC_USE_CTABLE)
2013   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2014 #else
2015   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2016 #endif
2017   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2018   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2019   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2020   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2021   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2022   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2023   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2024   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2025   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2026 
2027   /* Because the B will have been resized we simply destroy it and create a new one each time */
2028   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2029   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2030   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2031   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2032   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2033   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2034 
2035   if (!B->preallocated) {
2036     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2037     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2038     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2039     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2040     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2041   }
2042 
2043   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2044   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2045 
2046   B->preallocated  = PETSC_TRUE;
2047   B->was_assembled = PETSC_FALSE;
2048   B->assembled     = PETSC_FALSE;
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2053 {
2054   PetscInt       m,rstart,cend;
2055   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2056   const PetscInt *JJ    =0;
2057   PetscScalar    *values=0;
2058   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2059   PetscErrorCode ierr;
2060   PetscBool      nooffprocentries;
2061 
2062   PetscFunctionBegin;
2063   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2064   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2065   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2066   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2067   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2068   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2069   m      = B->rmap->n/bs;
2070   rstart = B->rmap->rstart/bs;
2071   cend   = B->cmap->rend/bs;
2072 
2073   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2074   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2075   for (i=0; i<m; i++) {
2076     nz = ii[i+1] - ii[i];
2077     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2078     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2079     JJ     = jj + ii[i];
2080     bd     = 0;
2081     for (j=0; j<nz; j++) {
2082       if (*JJ >= i + rstart) break;
2083       JJ++;
2084       bd++;
2085     }
2086     d  = 0;
2087     for (; j<nz; j++) {
2088       if (*JJ++ >= cend) break;
2089       d++;
2090     }
2091     d_nnz[i] = d;
2092     o_nnz[i] = nz - d - bd;
2093     nz       = nz - bd;
2094     nz_max = PetscMax(nz_max,nz);
2095   }
2096   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2097   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2098   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2099 
2100   values = (PetscScalar*)V;
2101   if (!values) {
2102     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2103   }
2104   for (i=0; i<m; i++) {
2105     PetscInt          row    = i + rstart;
2106     PetscInt          ncols  = ii[i+1] - ii[i];
2107     const PetscInt    *icols = jj + ii[i];
2108     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2109       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2110       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2111     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2112       PetscInt j;
2113       for (j=0; j<ncols; j++) {
2114         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2115         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2116       }
2117     }
2118   }
2119 
2120   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2121   nooffprocentries    = B->nooffprocentries;
2122   B->nooffprocentries = PETSC_TRUE;
2123   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2124   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2125   B->nooffprocentries = nooffprocentries;
2126 
2127   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 /*MC
2132    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2133    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2134    the matrix is stored.
2135 
2136    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2137    can call MatSetOption(Mat, MAT_HERMITIAN);
2138 
2139    Options Database Keys:
2140 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2141 
2142    Notes:
2143      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2144      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2145 
2146    Level: beginner
2147 
2148 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2149 M*/
2150 
2151 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2152 {
2153   Mat_MPISBAIJ   *b;
2154   PetscErrorCode ierr;
2155   PetscBool      flg = PETSC_FALSE;
2156 
2157   PetscFunctionBegin;
2158   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2159   B->data = (void*)b;
2160   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2161 
2162   B->ops->destroy = MatDestroy_MPISBAIJ;
2163   B->ops->view    = MatView_MPISBAIJ;
2164   B->assembled    = PETSC_FALSE;
2165   B->insertmode   = NOT_SET_VALUES;
2166 
2167   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2168   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2169 
2170   /* build local table of row and column ownerships */
2171   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2172 
2173   /* build cache for off array entries formed */
2174   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2175 
2176   b->donotstash  = PETSC_FALSE;
2177   b->colmap      = NULL;
2178   b->garray      = NULL;
2179   b->roworiented = PETSC_TRUE;
2180 
2181   /* stuff used in block assembly */
2182   b->barray = 0;
2183 
2184   /* stuff used for matrix vector multiply */
2185   b->lvec    = 0;
2186   b->Mvctx   = 0;
2187   b->slvec0  = 0;
2188   b->slvec0b = 0;
2189   b->slvec1  = 0;
2190   b->slvec1a = 0;
2191   b->slvec1b = 0;
2192   b->sMvctx  = 0;
2193 
2194   /* stuff for MatGetRow() */
2195   b->rowindices   = 0;
2196   b->rowvalues    = 0;
2197   b->getrowactive = PETSC_FALSE;
2198 
2199   /* hash table stuff */
2200   b->ht           = 0;
2201   b->hd           = 0;
2202   b->ht_size      = 0;
2203   b->ht_flag      = PETSC_FALSE;
2204   b->ht_fact      = 0;
2205   b->ht_total_ct  = 0;
2206   b->ht_insert_ct = 0;
2207 
2208   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2209   b->ijonly = PETSC_FALSE;
2210 
2211   b->in_loc = 0;
2212   b->v_loc  = 0;
2213   b->n_loc  = 0;
2214 
2215   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2216   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2217   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2218   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2219 #if defined(PETSC_HAVE_ELEMENTAL)
2220   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2221 #endif
2222 #if defined(PETSC_HAVE_SCALAPACK)
2223   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_scalapack_C",MatConvert_SBAIJ_ScaLAPACK);CHKERRQ(ierr);
2224 #endif
2225   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2226   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2227 
2228   B->symmetric                  = PETSC_TRUE;
2229   B->structurally_symmetric     = PETSC_TRUE;
2230   B->symmetric_set              = PETSC_TRUE;
2231   B->structurally_symmetric_set = PETSC_TRUE;
2232   B->symmetric_eternal          = PETSC_TRUE;
2233 #if defined(PETSC_USE_COMPLEX)
2234   B->hermitian                  = PETSC_FALSE;
2235   B->hermitian_set              = PETSC_FALSE;
2236 #else
2237   B->hermitian                  = PETSC_TRUE;
2238   B->hermitian_set              = PETSC_TRUE;
2239 #endif
2240 
2241   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2242   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2243   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2244   if (flg) {
2245     PetscReal fact = 1.39;
2246     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2247     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2248     if (fact <= 1.0) fact = 1.39;
2249     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2250     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2251   }
2252   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 /*MC
2257    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2258 
2259    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2260    and MATMPISBAIJ otherwise.
2261 
2262    Options Database Keys:
2263 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2264 
2265   Level: beginner
2266 
2267 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2268 M*/
2269 
2270 /*@C
2271    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2272    the user should preallocate the matrix storage by setting the parameters
2273    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2274    performance can be increased by more than a factor of 50.
2275 
2276    Collective on Mat
2277 
2278    Input Parameters:
2279 +  B - the matrix
2280 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2281           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2282 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2283            submatrix  (same for all local rows)
2284 .  d_nnz - array containing the number of block nonzeros in the various block rows
2285            in the upper triangular and diagonal part of the in diagonal portion of the local
2286            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2287            for the diagonal entry and set a value even if it is zero.
2288 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2289            submatrix (same for all local rows).
2290 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2291            off-diagonal portion of the local submatrix that is right of the diagonal
2292            (possibly different for each block row) or NULL.
2293 
2294 
2295    Options Database Keys:
2296 +   -mat_no_unroll - uses code that does not unroll the loops in the
2297                      block calculations (much slower)
2298 -   -mat_block_size - size of the blocks to use
2299 
2300    Notes:
2301 
2302    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2303    than it must be used on all processors that share the object for that argument.
2304 
2305    If the *_nnz parameter is given then the *_nz parameter is ignored
2306 
2307    Storage Information:
2308    For a square global matrix we define each processor's diagonal portion
2309    to be its local rows and the corresponding columns (a square submatrix);
2310    each processor's off-diagonal portion encompasses the remainder of the
2311    local matrix (a rectangular submatrix).
2312 
2313    The user can specify preallocated storage for the diagonal part of
2314    the local submatrix with either d_nz or d_nnz (not both).  Set
2315    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2316    memory allocation.  Likewise, specify preallocated storage for the
2317    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2318 
2319    You can call MatGetInfo() to get information on how effective the preallocation was;
2320    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2321    You can also run with the option -info and look for messages with the string
2322    malloc in them to see if additional memory allocation was needed.
2323 
2324    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2325    the figure below we depict these three local rows and all columns (0-11).
2326 
2327 .vb
2328            0 1 2 3 4 5 6 7 8 9 10 11
2329           --------------------------
2330    row 3  |. . . d d d o o o o  o  o
2331    row 4  |. . . d d d o o o o  o  o
2332    row 5  |. . . d d d o o o o  o  o
2333           --------------------------
2334 .ve
2335 
2336    Thus, any entries in the d locations are stored in the d (diagonal)
2337    submatrix, and any entries in the o locations are stored in the
2338    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2339    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2340 
2341    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2342    plus the diagonal part of the d matrix,
2343    and o_nz should indicate the number of block nonzeros per row in the o matrix
2344 
2345    In general, for PDE problems in which most nonzeros are near the diagonal,
2346    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2347    or you will get TERRIBLE performance; see the users' manual chapter on
2348    matrices.
2349 
2350    Level: intermediate
2351 
2352 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2353 @*/
2354 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2355 {
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2360   PetscValidType(B,1);
2361   PetscValidLogicalCollectiveInt(B,bs,2);
2362   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 /*@C
2367    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2368    (block compressed row).  For good matrix assembly performance
2369    the user should preallocate the matrix storage by setting the parameters
2370    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2371    performance can be increased by more than a factor of 50.
2372 
2373    Collective
2374 
2375    Input Parameters:
2376 +  comm - MPI communicator
2377 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2378           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2379 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2380            This value should be the same as the local size used in creating the
2381            y vector for the matrix-vector product y = Ax.
2382 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2383            This value should be the same as the local size used in creating the
2384            x vector for the matrix-vector product y = Ax.
2385 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2386 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2387 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2388            submatrix  (same for all local rows)
2389 .  d_nnz - array containing the number of block nonzeros in the various block rows
2390            in the upper triangular portion of the in diagonal portion of the local
2391            (possibly different for each block block row) or NULL.
2392            If you plan to factor the matrix you must leave room for the diagonal entry and
2393            set its value even if it is zero.
2394 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2395            submatrix (same for all local rows).
2396 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2397            off-diagonal portion of the local submatrix (possibly different for
2398            each block row) or NULL.
2399 
2400    Output Parameter:
2401 .  A - the matrix
2402 
2403    Options Database Keys:
2404 +   -mat_no_unroll - uses code that does not unroll the loops in the
2405                      block calculations (much slower)
2406 .   -mat_block_size - size of the blocks to use
2407 -   -mat_mpi - use the parallel matrix data structures even on one processor
2408                (defaults to using SeqBAIJ format on one processor)
2409 
2410    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2411    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2412    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2413 
2414    Notes:
2415    The number of rows and columns must be divisible by blocksize.
2416    This matrix type does not support complex Hermitian operation.
2417 
2418    The user MUST specify either the local or global matrix dimensions
2419    (possibly both).
2420 
2421    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2422    than it must be used on all processors that share the object for that argument.
2423 
2424    If the *_nnz parameter is given then the *_nz parameter is ignored
2425 
2426    Storage Information:
2427    For a square global matrix we define each processor's diagonal portion
2428    to be its local rows and the corresponding columns (a square submatrix);
2429    each processor's off-diagonal portion encompasses the remainder of the
2430    local matrix (a rectangular submatrix).
2431 
2432    The user can specify preallocated storage for the diagonal part of
2433    the local submatrix with either d_nz or d_nnz (not both).  Set
2434    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2435    memory allocation.  Likewise, specify preallocated storage for the
2436    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2437 
2438    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2439    the figure below we depict these three local rows and all columns (0-11).
2440 
2441 .vb
2442            0 1 2 3 4 5 6 7 8 9 10 11
2443           --------------------------
2444    row 3  |. . . d d d o o o o  o  o
2445    row 4  |. . . d d d o o o o  o  o
2446    row 5  |. . . d d d o o o o  o  o
2447           --------------------------
2448 .ve
2449 
2450    Thus, any entries in the d locations are stored in the d (diagonal)
2451    submatrix, and any entries in the o locations are stored in the
2452    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2453    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2454 
2455    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2456    plus the diagonal part of the d matrix,
2457    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2458    In general, for PDE problems in which most nonzeros are near the diagonal,
2459    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2460    or you will get TERRIBLE performance; see the users' manual chapter on
2461    matrices.
2462 
2463    Level: intermediate
2464 
2465 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2466 @*/
2467 
2468 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2469 {
2470   PetscErrorCode ierr;
2471   PetscMPIInt    size;
2472 
2473   PetscFunctionBegin;
2474   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2475   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2476   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2477   if (size > 1) {
2478     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2479     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2480   } else {
2481     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2482     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2483   }
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 
2488 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2489 {
2490   Mat            mat;
2491   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2492   PetscErrorCode ierr;
2493   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2494   PetscScalar    *array;
2495 
2496   PetscFunctionBegin;
2497   *newmat = 0;
2498 
2499   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2500   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2501   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2502   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2503   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2504 
2505   mat->factortype   = matin->factortype;
2506   mat->preallocated = PETSC_TRUE;
2507   mat->assembled    = PETSC_TRUE;
2508   mat->insertmode   = NOT_SET_VALUES;
2509 
2510   a      = (Mat_MPISBAIJ*)mat->data;
2511   a->bs2 = oldmat->bs2;
2512   a->mbs = oldmat->mbs;
2513   a->nbs = oldmat->nbs;
2514   a->Mbs = oldmat->Mbs;
2515   a->Nbs = oldmat->Nbs;
2516 
2517   a->size         = oldmat->size;
2518   a->rank         = oldmat->rank;
2519   a->donotstash   = oldmat->donotstash;
2520   a->roworiented  = oldmat->roworiented;
2521   a->rowindices   = 0;
2522   a->rowvalues    = 0;
2523   a->getrowactive = PETSC_FALSE;
2524   a->barray       = 0;
2525   a->rstartbs     = oldmat->rstartbs;
2526   a->rendbs       = oldmat->rendbs;
2527   a->cstartbs     = oldmat->cstartbs;
2528   a->cendbs       = oldmat->cendbs;
2529 
2530   /* hash table stuff */
2531   a->ht           = 0;
2532   a->hd           = 0;
2533   a->ht_size      = 0;
2534   a->ht_flag      = oldmat->ht_flag;
2535   a->ht_fact      = oldmat->ht_fact;
2536   a->ht_total_ct  = 0;
2537   a->ht_insert_ct = 0;
2538 
2539   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2540   if (oldmat->colmap) {
2541 #if defined(PETSC_USE_CTABLE)
2542     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2543 #else
2544     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2545     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2546     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2547 #endif
2548   } else a->colmap = 0;
2549 
2550   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2551     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2552     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2553     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2554   } else a->garray = 0;
2555 
2556   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2557   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2558   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2559   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2560   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2561 
2562   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2563   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2564   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2565   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2566 
2567   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2568   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2569   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2570   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2571   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2572   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2573   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2574   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2575   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2576   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2577   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2578   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2579   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2580 
2581   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2582   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2583   a->sMvctx = oldmat->sMvctx;
2584   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2585 
2586   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2587   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2588   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2589   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2590   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2591   *newmat = mat;
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2596 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2597 
2598 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2599 {
2600   PetscErrorCode ierr;
2601   PetscBool      isbinary;
2602 
2603   PetscFunctionBegin;
2604   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2605   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2606   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 /*XXXXX@
2611    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2612 
2613    Input Parameters:
2614 .  mat  - the matrix
2615 .  fact - factor
2616 
2617    Not Collective on Mat, each process can have a different hash factor
2618 
2619    Level: advanced
2620 
2621   Notes:
2622    This can also be set by the command line option: -mat_use_hash_table fact
2623 
2624 .seealso: MatSetOption()
2625 @XXXXX*/
2626 
2627 
2628 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2629 {
2630   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2631   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2632   PetscReal      atmp;
2633   PetscReal      *work,*svalues,*rvalues;
2634   PetscErrorCode ierr;
2635   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2636   PetscMPIInt    rank,size;
2637   PetscInt       *rowners_bs,dest,count,source;
2638   PetscScalar    *va;
2639   MatScalar      *ba;
2640   MPI_Status     stat;
2641 
2642   PetscFunctionBegin;
2643   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2644   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2645   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2646 
2647   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2648   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2649 
2650   bs  = A->rmap->bs;
2651   mbs = a->mbs;
2652   Mbs = a->Mbs;
2653   ba  = b->a;
2654   bi  = b->i;
2655   bj  = b->j;
2656 
2657   /* find ownerships */
2658   rowners_bs = A->rmap->range;
2659 
2660   /* each proc creates an array to be distributed */
2661   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2662 
2663   /* row_max for B */
2664   if (rank != size-1) {
2665     for (i=0; i<mbs; i++) {
2666       ncols = bi[1] - bi[0]; bi++;
2667       brow  = bs*i;
2668       for (j=0; j<ncols; j++) {
2669         bcol = bs*(*bj);
2670         for (kcol=0; kcol<bs; kcol++) {
2671           col  = bcol + kcol;                /* local col index */
2672           col += rowners_bs[rank+1];      /* global col index */
2673           for (krow=0; krow<bs; krow++) {
2674             atmp = PetscAbsScalar(*ba); ba++;
2675             row  = brow + krow;   /* local row index */
2676             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2677             if (work[col] < atmp) work[col] = atmp;
2678           }
2679         }
2680         bj++;
2681       }
2682     }
2683 
2684     /* send values to its owners */
2685     for (dest=rank+1; dest<size; dest++) {
2686       svalues = work + rowners_bs[dest];
2687       count   = rowners_bs[dest+1]-rowners_bs[dest];
2688       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2689     }
2690   }
2691 
2692   /* receive values */
2693   if (rank) {
2694     rvalues = work;
2695     count   = rowners_bs[rank+1]-rowners_bs[rank];
2696     for (source=0; source<rank; source++) {
2697       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2698       /* process values */
2699       for (i=0; i<count; i++) {
2700         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2701       }
2702     }
2703   }
2704 
2705   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2706   ierr = PetscFree(work);CHKERRQ(ierr);
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2711 {
2712   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2713   PetscErrorCode    ierr;
2714   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2715   PetscScalar       *x,*ptr,*from;
2716   Vec               bb1;
2717   const PetscScalar *b;
2718 
2719   PetscFunctionBegin;
2720   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2721   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2722 
2723   if (flag == SOR_APPLY_UPPER) {
2724     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2725     PetscFunctionReturn(0);
2726   }
2727 
2728   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2729     if (flag & SOR_ZERO_INITIAL_GUESS) {
2730       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2731       its--;
2732     }
2733 
2734     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2735     while (its--) {
2736 
2737       /* lower triangular part: slvec0b = - B^T*xx */
2738       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2739 
2740       /* copy xx into slvec0a */
2741       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2742       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2743       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2744       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2745 
2746       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2747 
2748       /* copy bb into slvec1a */
2749       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2750       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2751       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2752       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2753 
2754       /* set slvec1b = 0 */
2755       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2756 
2757       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2758       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2759       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2760       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2761 
2762       /* upper triangular part: bb1 = bb1 - B*x */
2763       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2764 
2765       /* local diagonal sweep */
2766       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2767     }
2768     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2769   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2770     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2771   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2772     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2773   } else if (flag & SOR_EISENSTAT) {
2774     Vec               xx1;
2775     PetscBool         hasop;
2776     const PetscScalar *diag;
2777     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2778     PetscInt          i,n;
2779 
2780     if (!mat->xx1) {
2781       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2782       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2783     }
2784     xx1 = mat->xx1;
2785     bb1 = mat->bb1;
2786 
2787     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2788 
2789     if (!mat->diag) {
2790       /* this is wrong for same matrix with new nonzero values */
2791       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2792       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2793     }
2794     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2795 
2796     if (hasop) {
2797       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2798       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2799     } else {
2800       /*
2801           These two lines are replaced by code that may be a bit faster for a good compiler
2802       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2803       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2804       */
2805       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2806       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2807       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2808       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2809       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2810       if (omega == 1.0) {
2811         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2812         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2813       } else {
2814         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2815         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2816       }
2817       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2818       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2819       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2820       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2821     }
2822 
2823     /* multiply off-diagonal portion of matrix */
2824     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2825     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2826     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2827     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2828     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2829     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2830     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2831     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2832     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2833     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2834 
2835     /* local sweep */
2836     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2837     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2838   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2843 {
2844   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2845   PetscErrorCode ierr;
2846   Vec            lvec1,bb1;
2847 
2848   PetscFunctionBegin;
2849   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2850   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2851 
2852   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2853     if (flag & SOR_ZERO_INITIAL_GUESS) {
2854       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2855       its--;
2856     }
2857 
2858     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2859     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2860     while (its--) {
2861       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2862 
2863       /* lower diagonal part: bb1 = bb - B^T*xx */
2864       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2865       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2866 
2867       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2868       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2869       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2870 
2871       /* upper diagonal part: bb1 = bb1 - B*x */
2872       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2873       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2874 
2875       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2876 
2877       /* diagonal sweep */
2878       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2879     }
2880     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2881     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2882   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2883   PetscFunctionReturn(0);
2884 }
2885 
2886 /*@
2887      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2888          CSR format the local rows.
2889 
2890    Collective
2891 
2892    Input Parameters:
2893 +  comm - MPI communicator
2894 .  bs - the block size, only a block size of 1 is supported
2895 .  m - number of local rows (Cannot be PETSC_DECIDE)
2896 .  n - This value should be the same as the local size used in creating the
2897        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2898        calculated if N is given) For square matrices n is almost always m.
2899 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2900 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2901 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2902 .   j - column indices
2903 -   a - matrix values
2904 
2905    Output Parameter:
2906 .   mat - the matrix
2907 
2908    Level: intermediate
2909 
2910    Notes:
2911        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2912      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2913      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2914 
2915        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2916 
2917 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2918           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2919 @*/
2920 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2921 {
2922   PetscErrorCode ierr;
2923 
2924 
2925   PetscFunctionBegin;
2926   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2927   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2928   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2929   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2930   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2931   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 
2936 /*@C
2937    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2938 
2939    Collective
2940 
2941    Input Parameters:
2942 +  B - the matrix
2943 .  bs - the block size
2944 .  i - the indices into j for the start of each local row (starts with zero)
2945 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2946 -  v - optional values in the matrix
2947 
2948    Level: advanced
2949 
2950    Notes:
2951    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2952    and usually the numerical values as well
2953 
2954    Any entries below the diagonal are ignored
2955 
2956 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2957 @*/
2958 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2959 {
2960   PetscErrorCode ierr;
2961 
2962   PetscFunctionBegin;
2963   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2968 {
2969   PetscErrorCode ierr;
2970   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2971   PetscInt       *indx;
2972   PetscScalar    *values;
2973 
2974   PetscFunctionBegin;
2975   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2976   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2977     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2978     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2979     PetscInt       *bindx,rmax=a->rmax,j;
2980     PetscMPIInt    rank,size;
2981 
2982     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2983     mbs = m/bs; Nbs = N/cbs;
2984     if (n == PETSC_DECIDE) {
2985       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2986     }
2987     nbs = n/cbs;
2988 
2989     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2990     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2991 
2992     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2993     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
2994     if (rank == size-1) {
2995       /* Check sum(nbs) = Nbs */
2996       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2997     }
2998 
2999     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3000     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3001     for (i=0; i<mbs; i++) {
3002       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3003       nnz  = nnz/bs;
3004       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3005       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3006       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3007     }
3008     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3009     ierr = PetscFree(bindx);CHKERRQ(ierr);
3010 
3011     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3012     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3013     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3014     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
3015     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3016     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3017     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3018   }
3019 
3020   /* numeric phase */
3021   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3022   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3023 
3024   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3025   for (i=0; i<m; i++) {
3026     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3027     Ii   = i + rstart;
3028     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3029     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3030   }
3031   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3032   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036