xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision c87ba875e4007ad659b117ea274f03d5f4cd5ea7)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 /* This could be moved to matimpl.h */
12 static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
13 {
14   Mat            preallocator;
15   PetscInt       r,rstart,rend;
16   PetscInt       bs,i,m,n,M,N;
17   PetscBool      cong = PETSC_TRUE;
18   PetscErrorCode ierr;
19 
20   PetscFunctionBegin;
21   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
22   PetscValidLogicalCollectiveInt(B,nm,2);
23   for (i = 0; i < nm; i++) {
24     PetscValidHeaderSpecific(X[i],MAT_CLASSID,3);
25     ierr = PetscLayoutCompare(B->rmap,X[i]->rmap,&cong);CHKERRQ(ierr);
26     if (!cong) SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for different layouts");
27   }
28   PetscValidLogicalCollectiveBool(B,fill,5);
29   ierr = MatGetBlockSize(B,&bs);CHKERRQ(ierr);
30   ierr = MatGetSize(B,&M,&N);CHKERRQ(ierr);
31   ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
32   ierr = MatCreate(PetscObjectComm((PetscObject)B),&preallocator);CHKERRQ(ierr);
33   ierr = MatSetType(preallocator,MATPREALLOCATOR);CHKERRQ(ierr);
34   ierr = MatSetBlockSize(preallocator,bs);CHKERRQ(ierr);
35   ierr = MatSetSizes(preallocator,m,n,M,N);CHKERRQ(ierr);
36   ierr = MatSetUp(preallocator);CHKERRQ(ierr);
37   ierr = MatGetOwnershipRange(preallocator,&rstart,&rend);CHKERRQ(ierr);
38   for (r = rstart; r < rend; ++r) {
39     PetscInt          ncols;
40     const PetscInt    *row;
41     const PetscScalar *vals;
42 
43     for (i = 0; i < nm; i++) {
44       ierr = MatGetRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
45       ierr = MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
46       if (symm && symm[i]) {
47         ierr = MatSetValues(preallocator,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
48       }
49       ierr = MatRestoreRow(X[i],r,&ncols,&row,&vals);CHKERRQ(ierr);
50     }
51   }
52   ierr = MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
53   ierr = MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
54   ierr = MatPreallocatorPreallocate(preallocator,fill,B);CHKERRQ(ierr);
55   ierr = MatDestroy(&preallocator);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 
59 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
60 {
61   Mat            B;
62   PetscErrorCode ierr;
63   PetscInt       r;
64 
65   PetscFunctionBegin;
66   if (reuse != MAT_REUSE_MATRIX) {
67     PetscBool symm = PETSC_TRUE,isdense;
68     PetscInt  bs;
69 
70     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
71     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
72     ierr = MatSetType(B,newtype);CHKERRQ(ierr);
73     ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
74     ierr = MatSetBlockSize(B,bs);CHKERRQ(ierr);
75     ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
76     ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
77     ierr = PetscObjectTypeCompareAny((PetscObject)B,&isdense,MATSEQDENSE,MATMPIDENSE,MATSEQDENSECUDA,"");CHKERRQ(ierr);
78     if (!isdense) {
79       ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
80       ierr = MatPreallocateWithMats_Private(B,1,&A,&symm,PETSC_TRUE);CHKERRQ(ierr);
81       ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
82     } else {
83       ierr = MatSetUp(B);CHKERRQ(ierr);
84     }
85   } else {
86     B    = *newmat;
87     ierr = MatZeroEntries(B);CHKERRQ(ierr);
88   }
89 
90   ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
91   for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
92     PetscInt          ncols;
93     const PetscInt    *row;
94     const PetscScalar *vals;
95 
96     ierr = MatGetRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
97     ierr = MatSetValues(B,1,&r,ncols,row,vals,INSERT_VALUES);CHKERRQ(ierr);
98 #if defined(PETSC_USE_COMPLEX)
99     if (A->hermitian) {
100       PetscInt i;
101       for (i = 0; i < ncols; i++) {
102         ierr = MatSetValue(B,row[i],r,PetscConj(vals[i]),INSERT_VALUES);CHKERRQ(ierr);
103       }
104     } else {
105       ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
106     }
107 #else
108     ierr = MatSetValues(B,ncols,row,1,&r,vals,INSERT_VALUES);CHKERRQ(ierr);
109 #endif
110     ierr = MatRestoreRow(A,r,&ncols,&row,&vals);CHKERRQ(ierr);
111   }
112   ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
113   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
114   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
115 
116   if (reuse == MAT_INPLACE_MATRIX) {
117     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
118   } else {
119     *newmat = B;
120   }
121   PetscFunctionReturn(0);
122 }
123 
124 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
125 {
126   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
127   PetscErrorCode ierr;
128 
129   PetscFunctionBegin;
130   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
131   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
136 {
137   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
138   PetscErrorCode ierr;
139 
140   PetscFunctionBegin;
141   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
142   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
147   { \
148     brow = row/bs;  \
149     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150     rmax = aimax[brow]; nrow = ailen[brow]; \
151     bcol = col/bs; \
152     ridx = row % bs; cidx = col % bs; \
153     low  = 0; high = nrow; \
154     while (high-low > 3) { \
155       t = (low+high)/2; \
156       if (rp[t] > bcol) high = t; \
157       else              low  = t; \
158     } \
159     for (_i=low; _i<high; _i++) { \
160       if (rp[_i] > bcol) break; \
161       if (rp[_i] == bcol) { \
162         bap = ap + bs2*_i + bs*cidx + ridx; \
163         if (addv == ADD_VALUES) *bap += value;  \
164         else                    *bap  = value;  \
165         goto a_noinsert; \
166       } \
167     } \
168     if (a->nonew == 1) goto a_noinsert; \
169     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
170     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
171     N = nrow++ - 1;  \
172     /* shift up all the later entries in this row */ \
173     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
174     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
175     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
176     rp[_i]                      = bcol;  \
177     ap[bs2*_i + bs*cidx + ridx] = value;  \
178     A->nonzerostate++;\
179 a_noinsert:; \
180     ailen[brow] = nrow; \
181   }
182 
183 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
184   { \
185     brow = row/bs;  \
186     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
187     rmax = bimax[brow]; nrow = bilen[brow]; \
188     bcol = col/bs; \
189     ridx = row % bs; cidx = col % bs; \
190     low  = 0; high = nrow; \
191     while (high-low > 3) { \
192       t = (low+high)/2; \
193       if (rp[t] > bcol) high = t; \
194       else              low  = t; \
195     } \
196     for (_i=low; _i<high; _i++) { \
197       if (rp[_i] > bcol) break; \
198       if (rp[_i] == bcol) { \
199         bap = ap + bs2*_i + bs*cidx + ridx; \
200         if (addv == ADD_VALUES) *bap += value;  \
201         else                    *bap  = value;  \
202         goto b_noinsert; \
203       } \
204     } \
205     if (b->nonew == 1) goto b_noinsert; \
206     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
207     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
208     N = nrow++ - 1;  \
209     /* shift up all the later entries in this row */ \
210     ierr  = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr); \
211     ierr  = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
212     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \
213     rp[_i]                      = bcol;  \
214     ap[bs2*_i + bs*cidx + ridx] = value;  \
215     B->nonzerostate++;\
216 b_noinsert:; \
217     bilen[brow] = nrow; \
218   }
219 
220 /* Only add/insert a(i,j) with i<=j (blocks).
221    Any a(i,j) with i>j input by user is ingored or generates an error
222 */
223 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
224 {
225   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
226   MatScalar      value;
227   PetscBool      roworiented = baij->roworiented;
228   PetscErrorCode ierr;
229   PetscInt       i,j,row,col;
230   PetscInt       rstart_orig=mat->rmap->rstart;
231   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
232   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
233 
234   /* Some Variables required in the macro */
235   Mat          A     = baij->A;
236   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
237   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
238   MatScalar    *aa   =a->a;
239 
240   Mat         B     = baij->B;
241   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
242   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
243   MatScalar   *ba   =b->a;
244 
245   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
246   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
247   MatScalar *ap,*bap;
248 
249   /* for stash */
250   PetscInt  n_loc, *in_loc = NULL;
251   MatScalar *v_loc = NULL;
252 
253   PetscFunctionBegin;
254   if (!baij->donotstash) {
255     if (n > baij->n_loc) {
256       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
257       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
258       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
259       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
260 
261       baij->n_loc = n;
262     }
263     in_loc = baij->in_loc;
264     v_loc  = baij->v_loc;
265   }
266 
267   for (i=0; i<m; i++) {
268     if (im[i] < 0) continue;
269 #if defined(PETSC_USE_DEBUG)
270     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
271 #endif
272     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
273       row = im[i] - rstart_orig;              /* local row index */
274       for (j=0; j<n; j++) {
275         if (im[i]/bs > in[j]/bs) {
276           if (a->ignore_ltriangular) {
277             continue;    /* ignore lower triangular blocks */
278           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
279         }
280         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
281           col  = in[j] - cstart_orig;         /* local col index */
282           brow = row/bs; bcol = col/bs;
283           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
284           if (roworiented) value = v[i*n+j];
285           else             value = v[i+j*m];
286           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
287           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
288         } else if (in[j] < 0) continue;
289 #if defined(PETSC_USE_DEBUG)
290         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
291 #endif
292         else {  /* off-diag entry (B) */
293           if (mat->was_assembled) {
294             if (!baij->colmap) {
295               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
296             }
297 #if defined(PETSC_USE_CTABLE)
298             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
299             col  = col - 1;
300 #else
301             col = baij->colmap[in[j]/bs] - 1;
302 #endif
303             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
304               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
305               col  =  in[j];
306               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
307               B    = baij->B;
308               b    = (Mat_SeqBAIJ*)(B)->data;
309               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
310               ba   = b->a;
311             } else col += in[j]%bs;
312           } else col = in[j];
313           if (roworiented) value = v[i*n+j];
314           else             value = v[i+j*m];
315           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
316           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
317         }
318       }
319     } else {  /* off processor entry */
320       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
321       if (!baij->donotstash) {
322         mat->assembled = PETSC_FALSE;
323         n_loc          = 0;
324         for (j=0; j<n; j++) {
325           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
326           in_loc[n_loc] = in[j];
327           if (roworiented) {
328             v_loc[n_loc] = v[i*n+j];
329           } else {
330             v_loc[n_loc] = v[j*m+i];
331           }
332           n_loc++;
333         }
334         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
335       }
336     }
337   }
338   PetscFunctionReturn(0);
339 }
340 
341 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
342 {
343   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
344   PetscErrorCode    ierr;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
348   PetscBool         roworiented=a->roworiented;
349   const PetscScalar *value     = v;
350   MatScalar         *ap,*aa = a->a,*bap;
351 
352   PetscFunctionBegin;
353   if (col < row) {
354     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
355     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
356   }
357   rp   = aj + ai[row];
358   ap   = aa + bs2*ai[row];
359   rmax = imax[row];
360   nrow = ailen[row];
361   value = v;
362   low   = 0;
363   high  = nrow;
364 
365   while (high-low > 7) {
366     t = (low+high)/2;
367     if (rp[t] > col) high = t;
368     else             low  = t;
369   }
370   for (i=low; i<high; i++) {
371     if (rp[i] > col) break;
372     if (rp[i] == col) {
373       bap = ap +  bs2*i;
374       if (roworiented) {
375         if (is == ADD_VALUES) {
376           for (ii=0; ii<bs; ii++) {
377             for (jj=ii; jj<bs2; jj+=bs) {
378               bap[jj] += *value++;
379             }
380           }
381         } else {
382           for (ii=0; ii<bs; ii++) {
383             for (jj=ii; jj<bs2; jj+=bs) {
384               bap[jj] = *value++;
385             }
386           }
387         }
388       } else {
389         if (is == ADD_VALUES) {
390           for (ii=0; ii<bs; ii++) {
391             for (jj=0; jj<bs; jj++) {
392               *bap++ += *value++;
393             }
394           }
395         } else {
396           for (ii=0; ii<bs; ii++) {
397             for (jj=0; jj<bs; jj++) {
398               *bap++  = *value++;
399             }
400           }
401         }
402       }
403       goto noinsert2;
404     }
405   }
406   if (nonew == 1) goto noinsert2;
407   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
408   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
409   N = nrow++ - 1; high++;
410   /* shift up all the later entries in this row */
411   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
412   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
413   rp[i] = col;
414   bap   = ap +  bs2*i;
415   if (roworiented) {
416     for (ii=0; ii<bs; ii++) {
417       for (jj=ii; jj<bs2; jj+=bs) {
418         bap[jj] = *value++;
419       }
420     }
421   } else {
422     for (ii=0; ii<bs; ii++) {
423       for (jj=0; jj<bs; jj++) {
424         *bap++ = *value++;
425       }
426     }
427   }
428   noinsert2:;
429   ailen[row] = nrow;
430   PetscFunctionReturn(0);
431 }
432 
433 /*
434    This routine is exactly duplicated in mpibaij.c
435 */
436 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
437 {
438   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
439   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
440   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
441   PetscErrorCode    ierr;
442   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
443   PetscBool         roworiented=a->roworiented;
444   const PetscScalar *value     = v;
445   MatScalar         *ap,*aa = a->a,*bap;
446 
447   PetscFunctionBegin;
448   rp   = aj + ai[row];
449   ap   = aa + bs2*ai[row];
450   rmax = imax[row];
451   nrow = ailen[row];
452   low  = 0;
453   high = nrow;
454   value = v;
455   while (high-low > 7) {
456     t = (low+high)/2;
457     if (rp[t] > col) high = t;
458     else             low  = t;
459   }
460   for (i=low; i<high; i++) {
461     if (rp[i] > col) break;
462     if (rp[i] == col) {
463       bap = ap +  bs2*i;
464       if (roworiented) {
465         if (is == ADD_VALUES) {
466           for (ii=0; ii<bs; ii++) {
467             for (jj=ii; jj<bs2; jj+=bs) {
468               bap[jj] += *value++;
469             }
470           }
471         } else {
472           for (ii=0; ii<bs; ii++) {
473             for (jj=ii; jj<bs2; jj+=bs) {
474               bap[jj] = *value++;
475             }
476           }
477         }
478       } else {
479         if (is == ADD_VALUES) {
480           for (ii=0; ii<bs; ii++,value+=bs) {
481             for (jj=0; jj<bs; jj++) {
482               bap[jj] += value[jj];
483             }
484             bap += bs;
485           }
486         } else {
487           for (ii=0; ii<bs; ii++,value+=bs) {
488             for (jj=0; jj<bs; jj++) {
489               bap[jj]  = value[jj];
490             }
491             bap += bs;
492           }
493         }
494       }
495       goto noinsert2;
496     }
497   }
498   if (nonew == 1) goto noinsert2;
499   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
500   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
501   N = nrow++ - 1; high++;
502   /* shift up all the later entries in this row */
503   ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
504   ierr  = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
505   rp[i] = col;
506   bap   = ap +  bs2*i;
507   if (roworiented) {
508     for (ii=0; ii<bs; ii++) {
509       for (jj=ii; jj<bs2; jj+=bs) {
510         bap[jj] = *value++;
511       }
512     }
513   } else {
514     for (ii=0; ii<bs; ii++) {
515       for (jj=0; jj<bs; jj++) {
516         *bap++ = *value++;
517       }
518     }
519   }
520   noinsert2:;
521   ailen[row] = nrow;
522   PetscFunctionReturn(0);
523 }
524 
525 /*
526     This routine could be optimized by removing the need for the block copy below and passing stride information
527   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
528 */
529 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
530 {
531   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
532   const MatScalar *value;
533   MatScalar       *barray     =baij->barray;
534   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
535   PetscErrorCode  ierr;
536   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
537   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
538   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
539 
540   PetscFunctionBegin;
541   if (!barray) {
542     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
543     baij->barray = barray;
544   }
545 
546   if (roworiented) {
547     stepval = (n-1)*bs;
548   } else {
549     stepval = (m-1)*bs;
550   }
551   for (i=0; i<m; i++) {
552     if (im[i] < 0) continue;
553 #if defined(PETSC_USE_DEBUG)
554     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
555 #endif
556     if (im[i] >= rstart && im[i] < rend) {
557       row = im[i] - rstart;
558       for (j=0; j<n; j++) {
559         if (im[i] > in[j]) {
560           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
561           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
562         }
563         /* If NumCol = 1 then a copy is not required */
564         if ((roworiented) && (n == 1)) {
565           barray = (MatScalar*) v + i*bs2;
566         } else if ((!roworiented) && (m == 1)) {
567           barray = (MatScalar*) v + j*bs2;
568         } else { /* Here a copy is required */
569           if (roworiented) {
570             value = v + i*(stepval+bs)*bs + j*bs;
571           } else {
572             value = v + j*(stepval+bs)*bs + i*bs;
573           }
574           for (ii=0; ii<bs; ii++,value+=stepval) {
575             for (jj=0; jj<bs; jj++) {
576               *barray++ = *value++;
577             }
578           }
579           barray -=bs2;
580         }
581 
582         if (in[j] >= cstart && in[j] < cend) {
583           col  = in[j] - cstart;
584           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
585         } else if (in[j] < 0) continue;
586 #if defined(PETSC_USE_DEBUG)
587         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
588 #endif
589         else {
590           if (mat->was_assembled) {
591             if (!baij->colmap) {
592               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
593             }
594 
595 #if defined(PETSC_USE_DEBUG)
596 #if defined(PETSC_USE_CTABLE)
597             { PetscInt data;
598               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
599               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
600             }
601 #else
602             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
603 #endif
604 #endif
605 #if defined(PETSC_USE_CTABLE)
606             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
607             col  = (col - 1)/bs;
608 #else
609             col = (baij->colmap[in[j]] - 1)/bs;
610 #endif
611             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
612               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
613               col  = in[j];
614             }
615           } else col = in[j];
616           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
617         }
618       }
619     } else {
620       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
621       if (!baij->donotstash) {
622         if (roworiented) {
623           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
624         } else {
625           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
626         }
627       }
628     }
629   }
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
634 {
635   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
636   PetscErrorCode ierr;
637   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
638   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
639 
640   PetscFunctionBegin;
641   for (i=0; i<m; i++) {
642     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
643     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
644     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
645       row = idxm[i] - bsrstart;
646       for (j=0; j<n; j++) {
647         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
648         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
649         if (idxn[j] >= bscstart && idxn[j] < bscend) {
650           col  = idxn[j] - bscstart;
651           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
652         } else {
653           if (!baij->colmap) {
654             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
655           }
656 #if defined(PETSC_USE_CTABLE)
657           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
658           data--;
659 #else
660           data = baij->colmap[idxn[j]/bs]-1;
661 #endif
662           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
663           else {
664             col  = data + idxn[j]%bs;
665             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
666           }
667         }
668       }
669     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
670   }
671   PetscFunctionReturn(0);
672 }
673 
674 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
675 {
676   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode ierr;
678   PetscReal      sum[2],*lnorm2;
679 
680   PetscFunctionBegin;
681   if (baij->size == 1) {
682     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
683   } else {
684     if (type == NORM_FROBENIUS) {
685       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
686       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
687       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
688       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
689       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
690       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
691       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
692       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
693     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
694       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
695       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
696       PetscReal    *rsum,*rsum2,vabs;
697       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
698       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
699       MatScalar    *v;
700 
701       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
702       ierr = PetscArrayzero(rsum,mat->cmap->N);CHKERRQ(ierr);
703       /* Amat */
704       v = amat->a; jj = amat->j;
705       for (brow=0; brow<mbs; brow++) {
706         grow = bs*(rstart + brow);
707         nz   = amat->i[brow+1] - amat->i[brow];
708         for (bcol=0; bcol<nz; bcol++) {
709           gcol = bs*(rstart + *jj); jj++;
710           for (col=0; col<bs; col++) {
711             for (row=0; row<bs; row++) {
712               vabs            = PetscAbsScalar(*v); v++;
713               rsum[gcol+col] += vabs;
714               /* non-diagonal block */
715               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
716             }
717           }
718         }
719         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
720       }
721       /* Bmat */
722       v = bmat->a; jj = bmat->j;
723       for (brow=0; brow<mbs; brow++) {
724         grow = bs*(rstart + brow);
725         nz = bmat->i[brow+1] - bmat->i[brow];
726         for (bcol=0; bcol<nz; bcol++) {
727           gcol = bs*garray[*jj]; jj++;
728           for (col=0; col<bs; col++) {
729             for (row=0; row<bs; row++) {
730               vabs            = PetscAbsScalar(*v); v++;
731               rsum[gcol+col] += vabs;
732               rsum[grow+row] += vabs;
733             }
734           }
735         }
736         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
737       }
738       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
739       *norm = 0.0;
740       for (col=0; col<mat->cmap->N; col++) {
741         if (rsum2[col] > *norm) *norm = rsum2[col];
742       }
743       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
744     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
745   }
746   PetscFunctionReturn(0);
747 }
748 
749 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
750 {
751   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
752   PetscErrorCode ierr;
753   PetscInt       nstash,reallocs;
754 
755   PetscFunctionBegin;
756   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
757 
758   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
759   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
760   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
761   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
762   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
763   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
764   PetscFunctionReturn(0);
765 }
766 
767 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
768 {
769   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
770   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
771   PetscErrorCode ierr;
772   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
773   PetscInt       *row,*col;
774   PetscBool      other_disassembled;
775   PetscMPIInt    n;
776   PetscBool      r1,r2,r3;
777   MatScalar      *val;
778 
779   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
780   PetscFunctionBegin;
781   if (!baij->donotstash &&  !mat->nooffprocentries) {
782     while (1) {
783       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
784       if (!flg) break;
785 
786       for (i=0; i<n;) {
787         /* Now identify the consecutive vals belonging to the same row */
788         for (j=i,rstart=row[j]; j<n; j++) {
789           if (row[j] != rstart) break;
790         }
791         if (j < n) ncols = j-i;
792         else       ncols = n-i;
793         /* Now assemble all these values with a single function call */
794         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
795         i    = j;
796       }
797     }
798     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
799     /* Now process the block-stash. Since the values are stashed column-oriented,
800        set the roworiented flag to column oriented, and after MatSetValues()
801        restore the original flags */
802     r1 = baij->roworiented;
803     r2 = a->roworiented;
804     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
805 
806     baij->roworiented = PETSC_FALSE;
807     a->roworiented    = PETSC_FALSE;
808 
809     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
810     while (1) {
811       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
812       if (!flg) break;
813 
814       for (i=0; i<n;) {
815         /* Now identify the consecutive vals belonging to the same row */
816         for (j=i,rstart=row[j]; j<n; j++) {
817           if (row[j] != rstart) break;
818         }
819         if (j < n) ncols = j-i;
820         else       ncols = n-i;
821         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
822         i    = j;
823       }
824     }
825     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
826 
827     baij->roworiented = r1;
828     a->roworiented    = r2;
829 
830     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
831   }
832 
833   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
834   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
835 
836   /* determine if any processor has disassembled, if so we must
837      also disassemble ourselfs, in order that we may reassemble. */
838   /*
839      if nonzero structure of submatrix B cannot change then we know that
840      no processor disassembled thus we can skip this stuff
841   */
842   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
843     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
844     if (mat->was_assembled && !other_disassembled) {
845       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
846     }
847   }
848 
849   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
850     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
851   }
852   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
853   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
854 
855   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
856 
857   baij->rowvalues = 0;
858 
859   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
860   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
861     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
862     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
863   }
864   PetscFunctionReturn(0);
865 }
866 
867 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
868 #include <petscdraw.h>
869 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
870 {
871   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
872   PetscErrorCode    ierr;
873   PetscInt          bs   = mat->rmap->bs;
874   PetscMPIInt       rank = baij->rank;
875   PetscBool         iascii,isdraw;
876   PetscViewer       sviewer;
877   PetscViewerFormat format;
878 
879   PetscFunctionBegin;
880   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
881   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
882   if (iascii) {
883     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
884     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
885       MatInfo info;
886       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
887       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
888       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
889       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
890       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
891       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
892       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
893       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
894       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
895       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
896       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
897       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
898       PetscFunctionReturn(0);
899     } else if (format == PETSC_VIEWER_ASCII_INFO) {
900       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
901       PetscFunctionReturn(0);
902     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
903       PetscFunctionReturn(0);
904     }
905   }
906 
907   if (isdraw) {
908     PetscDraw draw;
909     PetscBool isnull;
910     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
911     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
912     if (isnull) PetscFunctionReturn(0);
913   }
914 
915   {
916     /* assemble the entire matrix onto first processor. */
917     Mat          A;
918     Mat_SeqSBAIJ *Aloc;
919     Mat_SeqBAIJ  *Bloc;
920     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
921     MatScalar    *a;
922     const char   *matname;
923 
924     /* Should this be the same type as mat? */
925     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
926     if (!rank) {
927       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
928     } else {
929       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
930     }
931     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
932     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
933     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
934     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
935 
936     /* copy over the A part */
937     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
938     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
939     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
940 
941     for (i=0; i<mbs; i++) {
942       rvals[0] = bs*(baij->rstartbs + i);
943       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
944       for (j=ai[i]; j<ai[i+1]; j++) {
945         col = (baij->cstartbs+aj[j])*bs;
946         for (k=0; k<bs; k++) {
947           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
948           col++;
949           a += bs;
950         }
951       }
952     }
953     /* copy over the B part */
954     Bloc = (Mat_SeqBAIJ*)baij->B->data;
955     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
956     for (i=0; i<mbs; i++) {
957 
958       rvals[0] = bs*(baij->rstartbs + i);
959       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
960       for (j=ai[i]; j<ai[i+1]; j++) {
961         col = baij->garray[aj[j]]*bs;
962         for (k=0; k<bs; k++) {
963           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
964           col++;
965           a += bs;
966         }
967       }
968     }
969     ierr = PetscFree(rvals);CHKERRQ(ierr);
970     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
971     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
972     /*
973        Everyone has to call to draw the matrix since the graphics waits are
974        synchronized across all processors that share the PetscDraw object
975     */
976     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
977     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
978     if (!rank) {
979       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
980       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
981     }
982     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
983     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
984     ierr = MatDestroy(&A);CHKERRQ(ierr);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 /* Used for both MPIBAIJ and MPISBAIJ matrices */
990 #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
991 
992 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
993 {
994   PetscErrorCode ierr;
995   PetscBool      iascii,isdraw,issocket,isbinary;
996 
997   PetscFunctionBegin;
998   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1000   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1001   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1002   if (iascii || isdraw || issocket) {
1003     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1004   } else if (isbinary) {
1005     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1011 {
1012   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1013   PetscErrorCode ierr;
1014 
1015   PetscFunctionBegin;
1016 #if defined(PETSC_USE_LOG)
1017   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1018 #endif
1019   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1020   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1021   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1022   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1023 #if defined(PETSC_USE_CTABLE)
1024   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1025 #else
1026   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1027 #endif
1028   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1029   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1030   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1031   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1032   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1033   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1034   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1035   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1036   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1037   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1038   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1039   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1040   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1041   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1042   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1043 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1044   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1045 #endif
1046   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1047   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1048   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1049   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1050 
1051   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1052   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1053   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1054   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1055   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1056 #if defined(PETSC_HAVE_ELEMENTAL)
1057   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1058 #endif
1059   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpiaij_C",NULL);CHKERRQ(ierr);
1060   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpibaij_C",NULL);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1065 {
1066   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1067   PetscErrorCode    ierr;
1068   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1069   PetscScalar       *from;
1070   const PetscScalar *x;
1071 
1072   PetscFunctionBegin;
1073   /* diagonal part */
1074   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1075   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1076 
1077   /* subdiagonal part */
1078   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1079 
1080   /* copy x into the vec slvec0 */
1081   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1082   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1083 
1084   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1085   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1086   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1087 
1088   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1089   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1090   /* supperdiagonal part */
1091   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1096 {
1097   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1098   PetscErrorCode    ierr;
1099   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1100   PetscScalar       *from;
1101   const PetscScalar *x;
1102 
1103   PetscFunctionBegin;
1104   /* diagonal part */
1105   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1106   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1107 
1108   /* subdiagonal part */
1109   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1110 
1111   /* copy x into the vec slvec0 */
1112   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1113   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1114 
1115   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1116   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1117   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1118 
1119   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1120   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1121   /* supperdiagonal part */
1122   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1127 {
1128   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1129   PetscErrorCode ierr;
1130   PetscInt       nt;
1131 
1132   PetscFunctionBegin;
1133   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1134   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1135 
1136   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1137   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1138 
1139   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   /* do diagonal part */
1141   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1142   /* do supperdiagonal part */
1143   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1145   /* do subdiagonal part */
1146   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1147   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1148   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1149   PetscFunctionReturn(0);
1150 }
1151 
1152 PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy,Vec zz)
1153 {
1154   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1155   PetscErrorCode    ierr;
1156   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1157   PetscScalar       *from,zero=0.0;
1158   const PetscScalar *x;
1159 
1160   PetscFunctionBegin;
1161   /* diagonal part */
1162   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1163   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1164 
1165   /* subdiagonal part */
1166   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1167 
1168   /* copy x into the vec slvec0 */
1169   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1170   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1171   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1172   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1173 
1174   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1175   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1176   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1177 
1178   /* supperdiagonal part */
1179   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1184 {
1185   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1186   PetscErrorCode    ierr;
1187   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1188   PetscScalar       *from,zero=0.0;
1189   const PetscScalar *x;
1190 
1191   PetscFunctionBegin;
1192   /* diagonal part */
1193   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1194   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1195 
1196   /* subdiagonal part */
1197   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1198 
1199   /* copy x into the vec slvec0 */
1200   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1201   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1202   ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
1203   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1204 
1205   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1207   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1208 
1209   /* supperdiagonal part */
1210   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1215 {
1216   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1221   /* do diagonal part */
1222   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1223   /* do supperdiagonal part */
1224   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1225   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1226 
1227   /* do subdiagonal part */
1228   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 /*
1235   This only works correctly for square matrices where the subblock A->A is the
1236    diagonal block
1237 */
1238 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1239 {
1240   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1245   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1250 {
1251   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253 
1254   PetscFunctionBegin;
1255   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1256   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1266   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1267   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1278     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1279     PetscInt     max = 1,mbs = mat->mbs,tmp;
1280     for (i=0; i<mbs; i++) {
1281       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1282       if (max < tmp) max = tmp;
1283     }
1284     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1285   }
1286 
1287   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1288   lrow = row - brstart;  /* local row index */
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz  = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1342 
1343   PetscFunctionBegin;
1344   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1345   baij->getrowactive = PETSC_FALSE;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1350 {
1351   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1352   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1353 
1354   PetscFunctionBegin;
1355   aA->getrow_utriangular = PETSC_TRUE;
1356   PetscFunctionReturn(0);
1357 }
1358 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1359 {
1360   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1361   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1362 
1363   PetscFunctionBegin;
1364   aA->getrow_utriangular = PETSC_FALSE;
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1369 {
1370   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1375   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1380 {
1381   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1382   PetscErrorCode ierr;
1383 
1384   PetscFunctionBegin;
1385   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1386   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1391    Input: isrow       - distributed(parallel),
1392           iscol_local - locally owned (seq)
1393 */
1394 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1395 {
1396   PetscErrorCode ierr;
1397   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1398   const PetscInt *ptr1,*ptr2;
1399 
1400   PetscFunctionBegin;
1401   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1402   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1403   if (sz1 > sz2) {
1404     *flg = PETSC_FALSE;
1405     PetscFunctionReturn(0);
1406   }
1407 
1408   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1409   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1410 
1411   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1412   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1413   ierr = PetscArraycpy(a1,ptr1,sz1);CHKERRQ(ierr);
1414   ierr = PetscArraycpy(a2,ptr2,sz2);CHKERRQ(ierr);
1415   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1416   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1417 
1418   nmatch=0;
1419   k     = 0;
1420   for (i=0; i<sz1; i++){
1421     for (j=k; j<sz2; j++){
1422       if (a1[i] == a2[j]) {
1423         k = j; nmatch++;
1424         break;
1425       }
1426     }
1427   }
1428   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1429   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1430   ierr = PetscFree(a1);CHKERRQ(ierr);
1431   ierr = PetscFree(a2);CHKERRQ(ierr);
1432   if (nmatch < sz1) {
1433     *flg = PETSC_FALSE;
1434   } else {
1435     *flg = PETSC_TRUE;
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1441 {
1442   PetscErrorCode ierr;
1443   IS             iscol_local;
1444   PetscInt       csize;
1445   PetscBool      isequal;
1446 
1447   PetscFunctionBegin;
1448   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1449   if (call == MAT_REUSE_MATRIX) {
1450     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1451     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1452   } else {
1453     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1454     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1455     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1456   }
1457 
1458   /* now call MatCreateSubMatrix_MPIBAIJ() */
1459   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1460   if (call == MAT_INITIAL_MATRIX) {
1461     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1462     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1463   }
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1468 {
1469   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1470   PetscErrorCode ierr;
1471 
1472   PetscFunctionBegin;
1473   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1474   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1479 {
1480   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1481   Mat            A  = a->A,B = a->B;
1482   PetscErrorCode ierr;
1483   PetscLogDouble isend[5],irecv[5];
1484 
1485   PetscFunctionBegin;
1486   info->block_size = (PetscReal)matin->rmap->bs;
1487 
1488   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1489 
1490   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1491   isend[3] = info->memory;  isend[4] = info->mallocs;
1492 
1493   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1494 
1495   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1496   isend[3] += info->memory;  isend[4] += info->mallocs;
1497   if (flag == MAT_LOCAL) {
1498     info->nz_used      = isend[0];
1499     info->nz_allocated = isend[1];
1500     info->nz_unneeded  = isend[2];
1501     info->memory       = isend[3];
1502     info->mallocs      = isend[4];
1503   } else if (flag == MAT_GLOBAL_MAX) {
1504     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1505 
1506     info->nz_used      = irecv[0];
1507     info->nz_allocated = irecv[1];
1508     info->nz_unneeded  = irecv[2];
1509     info->memory       = irecv[3];
1510     info->mallocs      = irecv[4];
1511   } else if (flag == MAT_GLOBAL_SUM) {
1512     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1513 
1514     info->nz_used      = irecv[0];
1515     info->nz_allocated = irecv[1];
1516     info->nz_unneeded  = irecv[2];
1517     info->memory       = irecv[3];
1518     info->mallocs      = irecv[4];
1519   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1520   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1521   info->fill_ratio_needed = 0;
1522   info->factor_mallocs    = 0;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1527 {
1528   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1529   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1530   PetscErrorCode ierr;
1531 
1532   PetscFunctionBegin;
1533   switch (op) {
1534   case MAT_NEW_NONZERO_LOCATIONS:
1535   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1536   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1537   case MAT_KEEP_NONZERO_PATTERN:
1538   case MAT_SUBMAT_SINGLEIS:
1539   case MAT_NEW_NONZERO_LOCATION_ERR:
1540     MatCheckPreallocated(A,1);
1541     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1542     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1543     break;
1544   case MAT_ROW_ORIENTED:
1545     MatCheckPreallocated(A,1);
1546     a->roworiented = flg;
1547 
1548     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1549     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1550     break;
1551   case MAT_NEW_DIAGONALS:
1552   case MAT_SORTED_FULL:
1553     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1554     break;
1555   case MAT_IGNORE_OFF_PROC_ENTRIES:
1556     a->donotstash = flg;
1557     break;
1558   case MAT_USE_HASH_TABLE:
1559     a->ht_flag = flg;
1560     break;
1561   case MAT_HERMITIAN:
1562     MatCheckPreallocated(A,1);
1563     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1564 #if defined(PETSC_USE_COMPLEX)
1565     if (flg) { /* need different mat-vec ops */
1566       A->ops->mult             = MatMult_MPISBAIJ_Hermitian;
1567       A->ops->multadd          = MatMultAdd_MPISBAIJ_Hermitian;
1568       A->ops->multtranspose    = NULL;
1569       A->ops->multtransposeadd = NULL;
1570       A->symmetric = PETSC_FALSE;
1571     }
1572 #endif
1573     break;
1574   case MAT_SPD:
1575   case MAT_SYMMETRIC:
1576     MatCheckPreallocated(A,1);
1577     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1578 #if defined(PETSC_USE_COMPLEX)
1579     if (flg) { /* restore to use default mat-vec ops */
1580       A->ops->mult             = MatMult_MPISBAIJ;
1581       A->ops->multadd          = MatMultAdd_MPISBAIJ;
1582       A->ops->multtranspose    = MatMult_MPISBAIJ;
1583       A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1584     }
1585 #endif
1586     break;
1587   case MAT_STRUCTURALLY_SYMMETRIC:
1588     MatCheckPreallocated(A,1);
1589     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1590     break;
1591   case MAT_SYMMETRY_ETERNAL:
1592     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1593     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1594     break;
1595   case MAT_IGNORE_LOWER_TRIANGULAR:
1596     aA->ignore_ltriangular = flg;
1597     break;
1598   case MAT_ERROR_LOWER_TRIANGULAR:
1599     aA->ignore_ltriangular = flg;
1600     break;
1601   case MAT_GETROW_UPPERTRIANGULAR:
1602     aA->getrow_utriangular = flg;
1603     break;
1604   default:
1605     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1606   }
1607   PetscFunctionReturn(0);
1608 }
1609 
1610 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1611 {
1612   PetscErrorCode ierr;
1613 
1614   PetscFunctionBegin;
1615   if (reuse == MAT_INITIAL_MATRIX) {
1616     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1617   }  else if (reuse == MAT_REUSE_MATRIX) {
1618     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1619   }
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1624 {
1625   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1626   Mat            a     = baij->A, b=baij->B;
1627   PetscErrorCode ierr;
1628   PetscInt       nv,m,n;
1629   PetscBool      flg;
1630 
1631   PetscFunctionBegin;
1632   if (ll != rr) {
1633     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1634     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1635   }
1636   if (!ll) PetscFunctionReturn(0);
1637 
1638   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1639   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1640 
1641   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1642   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1643 
1644   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645 
1646   /* left diagonalscale the off-diagonal part */
1647   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1648 
1649   /* scale the diagonal part */
1650   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1651 
1652   /* right diagonalscale the off-diagonal part */
1653   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1654   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1655   PetscFunctionReturn(0);
1656 }
1657 
1658 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1659 {
1660   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1661   PetscErrorCode ierr;
1662 
1663   PetscFunctionBegin;
1664   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1669 
1670 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1671 {
1672   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1673   Mat            a,b,c,d;
1674   PetscBool      flg;
1675   PetscErrorCode ierr;
1676 
1677   PetscFunctionBegin;
1678   a = matA->A; b = matA->B;
1679   c = matB->A; d = matB->B;
1680 
1681   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1682   if (flg) {
1683     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1684   }
1685   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1690 {
1691   PetscErrorCode ierr;
1692   PetscBool      isbaij;
1693 
1694   PetscFunctionBegin;
1695   ierr = PetscObjectTypeCompareAny((PetscObject)B,&isbaij,MATSEQSBAIJ,MATMPISBAIJ,"");CHKERRQ(ierr);
1696   if (!isbaij) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)B)->type_name);
1697   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1698   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1699     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1700     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1701     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1702   } else {
1703     Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1704     Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)B->data;
1705 
1706     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1707     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1708   }
1709   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1714 {
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1723 {
1724   PetscErrorCode ierr;
1725   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1726   PetscBLASInt   bnz,one=1;
1727   Mat_SeqSBAIJ   *xa,*ya;
1728   Mat_SeqBAIJ    *xb,*yb;
1729 
1730   PetscFunctionBegin;
1731   if (str == SAME_NONZERO_PATTERN) {
1732     PetscScalar alpha = a;
1733     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1734     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1735     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1736     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1737     xb   = (Mat_SeqBAIJ*)xx->B->data;
1738     yb   = (Mat_SeqBAIJ*)yy->B->data;
1739     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1740     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1741     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1742   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1743     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1744     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1745     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1746   } else {
1747     Mat      B;
1748     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1749     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1750     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1751     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1752     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1753     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1754     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1755     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1756     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1757     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1758     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1759     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1760     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1761     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1762     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1763     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1764     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1765     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1766     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1767     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1768   }
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1773 {
1774   PetscErrorCode ierr;
1775   PetscInt       i;
1776   PetscBool      flg;
1777 
1778   PetscFunctionBegin;
1779   ierr = MatCreateSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1780   for (i=0; i<n; i++) {
1781     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1782     if (!flg) {
1783       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1784     }
1785   }
1786   PetscFunctionReturn(0);
1787 }
1788 
1789 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1790 {
1791   PetscErrorCode ierr;
1792   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1793   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1794 
1795   PetscFunctionBegin;
1796   if (!Y->preallocated) {
1797     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1798   } else if (!aij->nz) {
1799     PetscInt nonew = aij->nonew;
1800     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1801     aij->nonew = nonew;
1802   }
1803   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1808 {
1809   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1814   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1815   if (d) {
1816     PetscInt rstart;
1817     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1818     *d += rstart/A->rmap->bs;
1819 
1820   }
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1825 {
1826   PetscFunctionBegin;
1827   *a = ((Mat_MPISBAIJ*)A->data)->A;
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 /* -------------------------------------------------------------------*/
1832 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1833                                        MatGetRow_MPISBAIJ,
1834                                        MatRestoreRow_MPISBAIJ,
1835                                        MatMult_MPISBAIJ,
1836                                /*  4*/ MatMultAdd_MPISBAIJ,
1837                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1838                                        MatMultAdd_MPISBAIJ,
1839                                        0,
1840                                        0,
1841                                        0,
1842                                /* 10*/ 0,
1843                                        0,
1844                                        0,
1845                                        MatSOR_MPISBAIJ,
1846                                        MatTranspose_MPISBAIJ,
1847                                /* 15*/ MatGetInfo_MPISBAIJ,
1848                                        MatEqual_MPISBAIJ,
1849                                        MatGetDiagonal_MPISBAIJ,
1850                                        MatDiagonalScale_MPISBAIJ,
1851                                        MatNorm_MPISBAIJ,
1852                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1853                                        MatAssemblyEnd_MPISBAIJ,
1854                                        MatSetOption_MPISBAIJ,
1855                                        MatZeroEntries_MPISBAIJ,
1856                                /* 24*/ 0,
1857                                        0,
1858                                        0,
1859                                        0,
1860                                        0,
1861                                /* 29*/ MatSetUp_MPISBAIJ,
1862                                        0,
1863                                        0,
1864                                        MatGetDiagonalBlock_MPISBAIJ,
1865                                        0,
1866                                /* 34*/ MatDuplicate_MPISBAIJ,
1867                                        0,
1868                                        0,
1869                                        0,
1870                                        0,
1871                                /* 39*/ MatAXPY_MPISBAIJ,
1872                                        MatCreateSubMatrices_MPISBAIJ,
1873                                        MatIncreaseOverlap_MPISBAIJ,
1874                                        MatGetValues_MPISBAIJ,
1875                                        MatCopy_MPISBAIJ,
1876                                /* 44*/ 0,
1877                                        MatScale_MPISBAIJ,
1878                                        MatShift_MPISBAIJ,
1879                                        0,
1880                                        0,
1881                                /* 49*/ 0,
1882                                        0,
1883                                        0,
1884                                        0,
1885                                        0,
1886                                /* 54*/ 0,
1887                                        0,
1888                                        MatSetUnfactored_MPISBAIJ,
1889                                        0,
1890                                        MatSetValuesBlocked_MPISBAIJ,
1891                                /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1892                                        0,
1893                                        0,
1894                                        0,
1895                                        0,
1896                                /* 64*/ 0,
1897                                        0,
1898                                        0,
1899                                        0,
1900                                        0,
1901                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1902                                        0,
1903                                        MatConvert_MPISBAIJ_Basic,
1904                                        0,
1905                                        0,
1906                                /* 74*/ 0,
1907                                        0,
1908                                        0,
1909                                        0,
1910                                        0,
1911                                /* 79*/ 0,
1912                                        0,
1913                                        0,
1914                                        0,
1915                                        MatLoad_MPISBAIJ,
1916                                /* 84*/ 0,
1917                                        0,
1918                                        0,
1919                                        0,
1920                                        0,
1921                                /* 89*/ 0,
1922                                        0,
1923                                        0,
1924                                        0,
1925                                        0,
1926                                /* 94*/ 0,
1927                                        0,
1928                                        0,
1929                                        0,
1930                                        0,
1931                                /* 99*/ 0,
1932                                        0,
1933                                        0,
1934                                        0,
1935                                        0,
1936                                /*104*/ 0,
1937                                        MatRealPart_MPISBAIJ,
1938                                        MatImaginaryPart_MPISBAIJ,
1939                                        MatGetRowUpperTriangular_MPISBAIJ,
1940                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1941                                /*109*/ 0,
1942                                        0,
1943                                        0,
1944                                        0,
1945                                        MatMissingDiagonal_MPISBAIJ,
1946                                /*114*/ 0,
1947                                        0,
1948                                        0,
1949                                        0,
1950                                        0,
1951                                /*119*/ 0,
1952                                        0,
1953                                        0,
1954                                        0,
1955                                        0,
1956                                /*124*/ 0,
1957                                        0,
1958                                        0,
1959                                        0,
1960                                        0,
1961                                /*129*/ 0,
1962                                        0,
1963                                        0,
1964                                        0,
1965                                        0,
1966                                /*134*/ 0,
1967                                        0,
1968                                        0,
1969                                        0,
1970                                        0,
1971                                /*139*/ MatSetBlockSizes_Default,
1972                                        0,
1973                                        0,
1974                                        0,
1975                                        0,
1976                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1977 };
1978 
1979 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1980 {
1981   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1982   PetscErrorCode ierr;
1983   PetscInt       i,mbs,Mbs;
1984   PetscMPIInt    size;
1985 
1986   PetscFunctionBegin;
1987   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1988   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1989   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1990   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1991   if (B->rmap->N > B->cmap->N) SETERRQ2(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more rows %D than columns %D",B->rmap->N,B->cmap->N);
1992   if (B->rmap->n > B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"MPISBAIJ matrix cannot have more local rows %D than columns %D",B->rmap->n,B->cmap->n);
1993 
1994   mbs = B->rmap->n/bs;
1995   Mbs = B->rmap->N/bs;
1996   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1997 
1998   B->rmap->bs = bs;
1999   b->bs2      = bs*bs;
2000   b->mbs      = mbs;
2001   b->Mbs      = Mbs;
2002   b->nbs      = B->cmap->n/bs;
2003   b->Nbs      = B->cmap->N/bs;
2004 
2005   for (i=0; i<=b->size; i++) {
2006     b->rangebs[i] = B->rmap->range[i]/bs;
2007   }
2008   b->rstartbs = B->rmap->rstart/bs;
2009   b->rendbs   = B->rmap->rend/bs;
2010 
2011   b->cstartbs = B->cmap->rstart/bs;
2012   b->cendbs   = B->cmap->rend/bs;
2013 
2014 #if defined(PETSC_USE_CTABLE)
2015   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2016 #else
2017   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2018 #endif
2019   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2020   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2021   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2022   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2023   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2024   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2025   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2026   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2027   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2028 
2029   /* Because the B will have been resized we simply destroy it and create a new one each time */
2030   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2031   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2032   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2033   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2034   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2035   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2036 
2037   if (!B->preallocated) {
2038     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2039     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2040     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2041     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2042     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2043   }
2044 
2045   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2046   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2047 
2048   B->preallocated  = PETSC_TRUE;
2049   B->was_assembled = PETSC_FALSE;
2050   B->assembled     = PETSC_FALSE;
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2055 {
2056   PetscInt       m,rstart,cend;
2057   PetscInt       i,j,d,nz,bd, nz_max=0,*d_nnz=0,*o_nnz=0;
2058   const PetscInt *JJ    =0;
2059   PetscScalar    *values=0;
2060   PetscBool      roworiented = ((Mat_MPISBAIJ*)B->data)->roworiented;
2061   PetscErrorCode ierr;
2062   PetscBool      nooffprocentries;
2063 
2064   PetscFunctionBegin;
2065   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2066   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2067   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2068   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2069   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2070   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2071   m      = B->rmap->n/bs;
2072   rstart = B->rmap->rstart/bs;
2073   cend   = B->cmap->rend/bs;
2074 
2075   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2076   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2077   for (i=0; i<m; i++) {
2078     nz = ii[i+1] - ii[i];
2079     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2080     /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2081     JJ     = jj + ii[i];
2082     bd     = 0;
2083     for (j=0; j<nz; j++) {
2084       if (*JJ >= i + rstart) break;
2085       JJ++;
2086       bd++;
2087     }
2088     d  = 0;
2089     for (; j<nz; j++) {
2090       if (*JJ++ >= cend) break;
2091       d++;
2092     }
2093     d_nnz[i] = d;
2094     o_nnz[i] = nz - d - bd;
2095     nz       = nz - bd;
2096     nz_max = PetscMax(nz_max,nz);
2097   }
2098   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2099   ierr = MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2100   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2101 
2102   values = (PetscScalar*)V;
2103   if (!values) {
2104     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2105   }
2106   for (i=0; i<m; i++) {
2107     PetscInt          row    = i + rstart;
2108     PetscInt          ncols  = ii[i+1] - ii[i];
2109     const PetscInt    *icols = jj + ii[i];
2110     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2111       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2112       ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2113     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2114       PetscInt j;
2115       for (j=0; j<ncols; j++) {
2116         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2117         ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2118       }
2119     }
2120   }
2121 
2122   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2123   nooffprocentries    = B->nooffprocentries;
2124   B->nooffprocentries = PETSC_TRUE;
2125   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2126   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2127   B->nooffprocentries = nooffprocentries;
2128 
2129   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /*MC
2134    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2135    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2136    the matrix is stored.
2137 
2138    For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2139    can call MatSetOption(Mat, MAT_HERMITIAN);
2140 
2141    Options Database Keys:
2142 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2143 
2144    Notes:
2145      The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2146      diagonal portion of the matrix of each process has to less than or equal the number of columns.
2147 
2148    Level: beginner
2149 
2150 .seealso: MatCreateBAIJ(), MATSEQSBAIJ, MatType
2151 M*/
2152 
2153 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2154 {
2155   Mat_MPISBAIJ   *b;
2156   PetscErrorCode ierr;
2157   PetscBool      flg = PETSC_FALSE;
2158 
2159   PetscFunctionBegin;
2160   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2161   B->data = (void*)b;
2162   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2163 
2164   B->ops->destroy = MatDestroy_MPISBAIJ;
2165   B->ops->view    = MatView_MPISBAIJ;
2166   B->assembled    = PETSC_FALSE;
2167   B->insertmode   = NOT_SET_VALUES;
2168 
2169   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2170   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2171 
2172   /* build local table of row and column ownerships */
2173   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2174 
2175   /* build cache for off array entries formed */
2176   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2177 
2178   b->donotstash  = PETSC_FALSE;
2179   b->colmap      = NULL;
2180   b->garray      = NULL;
2181   b->roworiented = PETSC_TRUE;
2182 
2183   /* stuff used in block assembly */
2184   b->barray = 0;
2185 
2186   /* stuff used for matrix vector multiply */
2187   b->lvec    = 0;
2188   b->Mvctx   = 0;
2189   b->slvec0  = 0;
2190   b->slvec0b = 0;
2191   b->slvec1  = 0;
2192   b->slvec1a = 0;
2193   b->slvec1b = 0;
2194   b->sMvctx  = 0;
2195 
2196   /* stuff for MatGetRow() */
2197   b->rowindices   = 0;
2198   b->rowvalues    = 0;
2199   b->getrowactive = PETSC_FALSE;
2200 
2201   /* hash table stuff */
2202   b->ht           = 0;
2203   b->hd           = 0;
2204   b->ht_size      = 0;
2205   b->ht_flag      = PETSC_FALSE;
2206   b->ht_fact      = 0;
2207   b->ht_total_ct  = 0;
2208   b->ht_insert_ct = 0;
2209 
2210   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2211   b->ijonly = PETSC_FALSE;
2212 
2213   b->in_loc = 0;
2214   b->v_loc  = 0;
2215   b->n_loc  = 0;
2216 
2217   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2218   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2219   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2220   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2221 #if defined(PETSC_HAVE_ELEMENTAL)
2222   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2223 #endif
2224   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpiaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2225   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpibaij_C",MatConvert_MPISBAIJ_Basic);CHKERRQ(ierr);
2226 
2227   B->symmetric                  = PETSC_TRUE;
2228   B->structurally_symmetric     = PETSC_TRUE;
2229   B->symmetric_set              = PETSC_TRUE;
2230   B->structurally_symmetric_set = PETSC_TRUE;
2231   B->symmetric_eternal          = PETSC_TRUE;
2232 #if defined(PETSC_USE_COMPLEX)
2233   B->hermitian                  = PETSC_FALSE;
2234   B->hermitian_set              = PETSC_FALSE;
2235 #else
2236   B->hermitian                  = PETSC_TRUE;
2237   B->hermitian_set              = PETSC_TRUE;
2238 #endif
2239 
2240   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2241   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2242   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2243   if (flg) {
2244     PetscReal fact = 1.39;
2245     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2246     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2247     if (fact <= 1.0) fact = 1.39;
2248     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2249     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2250   }
2251   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 /*MC
2256    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2257 
2258    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2259    and MATMPISBAIJ otherwise.
2260 
2261    Options Database Keys:
2262 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2263 
2264   Level: beginner
2265 
2266 .seealso: MatCreateMPISBAIJ, MATSEQSBAIJ, MATMPISBAIJ
2267 M*/
2268 
2269 /*@C
2270    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2271    the user should preallocate the matrix storage by setting the parameters
2272    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2273    performance can be increased by more than a factor of 50.
2274 
2275    Collective on Mat
2276 
2277    Input Parameters:
2278 +  B - the matrix
2279 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2280           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2281 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2282            submatrix  (same for all local rows)
2283 .  d_nnz - array containing the number of block nonzeros in the various block rows
2284            in the upper triangular and diagonal part of the in diagonal portion of the local
2285            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2286            for the diagonal entry and set a value even if it is zero.
2287 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2288            submatrix (same for all local rows).
2289 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2290            off-diagonal portion of the local submatrix that is right of the diagonal
2291            (possibly different for each block row) or NULL.
2292 
2293 
2294    Options Database Keys:
2295 +   -mat_no_unroll - uses code that does not unroll the loops in the
2296                      block calculations (much slower)
2297 -   -mat_block_size - size of the blocks to use
2298 
2299    Notes:
2300 
2301    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2302    than it must be used on all processors that share the object for that argument.
2303 
2304    If the *_nnz parameter is given then the *_nz parameter is ignored
2305 
2306    Storage Information:
2307    For a square global matrix we define each processor's diagonal portion
2308    to be its local rows and the corresponding columns (a square submatrix);
2309    each processor's off-diagonal portion encompasses the remainder of the
2310    local matrix (a rectangular submatrix).
2311 
2312    The user can specify preallocated storage for the diagonal part of
2313    the local submatrix with either d_nz or d_nnz (not both).  Set
2314    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2315    memory allocation.  Likewise, specify preallocated storage for the
2316    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2317 
2318    You can call MatGetInfo() to get information on how effective the preallocation was;
2319    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2320    You can also run with the option -info and look for messages with the string
2321    malloc in them to see if additional memory allocation was needed.
2322 
2323    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2324    the figure below we depict these three local rows and all columns (0-11).
2325 
2326 .vb
2327            0 1 2 3 4 5 6 7 8 9 10 11
2328           --------------------------
2329    row 3  |. . . d d d o o o o  o  o
2330    row 4  |. . . d d d o o o o  o  o
2331    row 5  |. . . d d d o o o o  o  o
2332           --------------------------
2333 .ve
2334 
2335    Thus, any entries in the d locations are stored in the d (diagonal)
2336    submatrix, and any entries in the o locations are stored in the
2337    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2338    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2339 
2340    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2341    plus the diagonal part of the d matrix,
2342    and o_nz should indicate the number of block nonzeros per row in the o matrix
2343 
2344    In general, for PDE problems in which most nonzeros are near the diagonal,
2345    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2346    or you will get TERRIBLE performance; see the users' manual chapter on
2347    matrices.
2348 
2349    Level: intermediate
2350 
2351 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2352 @*/
2353 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2354 {
2355   PetscErrorCode ierr;
2356 
2357   PetscFunctionBegin;
2358   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2359   PetscValidType(B,1);
2360   PetscValidLogicalCollectiveInt(B,bs,2);
2361   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 /*@C
2366    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2367    (block compressed row).  For good matrix assembly performance
2368    the user should preallocate the matrix storage by setting the parameters
2369    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2370    performance can be increased by more than a factor of 50.
2371 
2372    Collective
2373 
2374    Input Parameters:
2375 +  comm - MPI communicator
2376 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2377           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2378 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2379            This value should be the same as the local size used in creating the
2380            y vector for the matrix-vector product y = Ax.
2381 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2382            This value should be the same as the local size used in creating the
2383            x vector for the matrix-vector product y = Ax.
2384 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2385 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2386 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2387            submatrix  (same for all local rows)
2388 .  d_nnz - array containing the number of block nonzeros in the various block rows
2389            in the upper triangular portion of the in diagonal portion of the local
2390            (possibly different for each block block row) or NULL.
2391            If you plan to factor the matrix you must leave room for the diagonal entry and
2392            set its value even if it is zero.
2393 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2394            submatrix (same for all local rows).
2395 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2396            off-diagonal portion of the local submatrix (possibly different for
2397            each block row) or NULL.
2398 
2399    Output Parameter:
2400 .  A - the matrix
2401 
2402    Options Database Keys:
2403 +   -mat_no_unroll - uses code that does not unroll the loops in the
2404                      block calculations (much slower)
2405 .   -mat_block_size - size of the blocks to use
2406 -   -mat_mpi - use the parallel matrix data structures even on one processor
2407                (defaults to using SeqBAIJ format on one processor)
2408 
2409    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2410    MatXXXXSetPreallocation() paradigm instead of this routine directly.
2411    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2412 
2413    Notes:
2414    The number of rows and columns must be divisible by blocksize.
2415    This matrix type does not support complex Hermitian operation.
2416 
2417    The user MUST specify either the local or global matrix dimensions
2418    (possibly both).
2419 
2420    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2421    than it must be used on all processors that share the object for that argument.
2422 
2423    If the *_nnz parameter is given then the *_nz parameter is ignored
2424 
2425    Storage Information:
2426    For a square global matrix we define each processor's diagonal portion
2427    to be its local rows and the corresponding columns (a square submatrix);
2428    each processor's off-diagonal portion encompasses the remainder of the
2429    local matrix (a rectangular submatrix).
2430 
2431    The user can specify preallocated storage for the diagonal part of
2432    the local submatrix with either d_nz or d_nnz (not both).  Set
2433    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2434    memory allocation.  Likewise, specify preallocated storage for the
2435    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2436 
2437    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2438    the figure below we depict these three local rows and all columns (0-11).
2439 
2440 .vb
2441            0 1 2 3 4 5 6 7 8 9 10 11
2442           --------------------------
2443    row 3  |. . . d d d o o o o  o  o
2444    row 4  |. . . d d d o o o o  o  o
2445    row 5  |. . . d d d o o o o  o  o
2446           --------------------------
2447 .ve
2448 
2449    Thus, any entries in the d locations are stored in the d (diagonal)
2450    submatrix, and any entries in the o locations are stored in the
2451    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2452    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2453 
2454    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2455    plus the diagonal part of the d matrix,
2456    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2457    In general, for PDE problems in which most nonzeros are near the diagonal,
2458    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2459    or you will get TERRIBLE performance; see the users' manual chapter on
2460    matrices.
2461 
2462    Level: intermediate
2463 
2464 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2465 @*/
2466 
2467 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2468 {
2469   PetscErrorCode ierr;
2470   PetscMPIInt    size;
2471 
2472   PetscFunctionBegin;
2473   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2474   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2475   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2476   if (size > 1) {
2477     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2478     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2479   } else {
2480     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2481     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 
2487 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2488 {
2489   Mat            mat;
2490   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2491   PetscErrorCode ierr;
2492   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2493   PetscScalar    *array;
2494 
2495   PetscFunctionBegin;
2496   *newmat = 0;
2497 
2498   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2499   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2500   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2501   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2502   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2503 
2504   mat->factortype   = matin->factortype;
2505   mat->preallocated = PETSC_TRUE;
2506   mat->assembled    = PETSC_TRUE;
2507   mat->insertmode   = NOT_SET_VALUES;
2508 
2509   a      = (Mat_MPISBAIJ*)mat->data;
2510   a->bs2 = oldmat->bs2;
2511   a->mbs = oldmat->mbs;
2512   a->nbs = oldmat->nbs;
2513   a->Mbs = oldmat->Mbs;
2514   a->Nbs = oldmat->Nbs;
2515 
2516   a->size         = oldmat->size;
2517   a->rank         = oldmat->rank;
2518   a->donotstash   = oldmat->donotstash;
2519   a->roworiented  = oldmat->roworiented;
2520   a->rowindices   = 0;
2521   a->rowvalues    = 0;
2522   a->getrowactive = PETSC_FALSE;
2523   a->barray       = 0;
2524   a->rstartbs     = oldmat->rstartbs;
2525   a->rendbs       = oldmat->rendbs;
2526   a->cstartbs     = oldmat->cstartbs;
2527   a->cendbs       = oldmat->cendbs;
2528 
2529   /* hash table stuff */
2530   a->ht           = 0;
2531   a->hd           = 0;
2532   a->ht_size      = 0;
2533   a->ht_flag      = oldmat->ht_flag;
2534   a->ht_fact      = oldmat->ht_fact;
2535   a->ht_total_ct  = 0;
2536   a->ht_insert_ct = 0;
2537 
2538   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+2);CHKERRQ(ierr);
2539   if (oldmat->colmap) {
2540 #if defined(PETSC_USE_CTABLE)
2541     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2542 #else
2543     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2544     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2545     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
2546 #endif
2547   } else a->colmap = 0;
2548 
2549   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2550     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2551     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2552     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
2553   } else a->garray = 0;
2554 
2555   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2556   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2557   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2558   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2559   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2560 
2561   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2562   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2563   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2564   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2565 
2566   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2567   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2568   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2569   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2570   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2571   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2572   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2573   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2574   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2575   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2576   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2577   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2578   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2579 
2580   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2581   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2582   a->sMvctx = oldmat->sMvctx;
2583   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2584 
2585   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2586   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2587   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2588   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2589   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2590   *newmat = mat;
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 /* Used for both MPIBAIJ and MPISBAIJ matrices */
2595 #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2596 
2597 PetscErrorCode MatLoad_MPISBAIJ(Mat mat,PetscViewer viewer)
2598 {
2599   PetscErrorCode ierr;
2600   PetscBool      isbinary;
2601 
2602   PetscFunctionBegin;
2603   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2604   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
2605   ierr = MatLoad_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
2606   PetscFunctionReturn(0);
2607 }
2608 
2609 /*XXXXX@
2610    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2611 
2612    Input Parameters:
2613 .  mat  - the matrix
2614 .  fact - factor
2615 
2616    Not Collective on Mat, each process can have a different hash factor
2617 
2618    Level: advanced
2619 
2620   Notes:
2621    This can also be set by the command line option: -mat_use_hash_table fact
2622 
2623 .seealso: MatSetOption()
2624 @XXXXX*/
2625 
2626 
2627 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2628 {
2629   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2630   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2631   PetscReal      atmp;
2632   PetscReal      *work,*svalues,*rvalues;
2633   PetscErrorCode ierr;
2634   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2635   PetscMPIInt    rank,size;
2636   PetscInt       *rowners_bs,dest,count,source;
2637   PetscScalar    *va;
2638   MatScalar      *ba;
2639   MPI_Status     stat;
2640 
2641   PetscFunctionBegin;
2642   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2643   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2644   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2645 
2646   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2647   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2648 
2649   bs  = A->rmap->bs;
2650   mbs = a->mbs;
2651   Mbs = a->Mbs;
2652   ba  = b->a;
2653   bi  = b->i;
2654   bj  = b->j;
2655 
2656   /* find ownerships */
2657   rowners_bs = A->rmap->range;
2658 
2659   /* each proc creates an array to be distributed */
2660   ierr = PetscCalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2661 
2662   /* row_max for B */
2663   if (rank != size-1) {
2664     for (i=0; i<mbs; i++) {
2665       ncols = bi[1] - bi[0]; bi++;
2666       brow  = bs*i;
2667       for (j=0; j<ncols; j++) {
2668         bcol = bs*(*bj);
2669         for (kcol=0; kcol<bs; kcol++) {
2670           col  = bcol + kcol;                /* local col index */
2671           col += rowners_bs[rank+1];      /* global col index */
2672           for (krow=0; krow<bs; krow++) {
2673             atmp = PetscAbsScalar(*ba); ba++;
2674             row  = brow + krow;   /* local row index */
2675             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2676             if (work[col] < atmp) work[col] = atmp;
2677           }
2678         }
2679         bj++;
2680       }
2681     }
2682 
2683     /* send values to its owners */
2684     for (dest=rank+1; dest<size; dest++) {
2685       svalues = work + rowners_bs[dest];
2686       count   = rowners_bs[dest+1]-rowners_bs[dest];
2687       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2688     }
2689   }
2690 
2691   /* receive values */
2692   if (rank) {
2693     rvalues = work;
2694     count   = rowners_bs[rank+1]-rowners_bs[rank];
2695     for (source=0; source<rank; source++) {
2696       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2697       /* process values */
2698       for (i=0; i<count; i++) {
2699         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2700       }
2701     }
2702   }
2703 
2704   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2705   ierr = PetscFree(work);CHKERRQ(ierr);
2706   PetscFunctionReturn(0);
2707 }
2708 
2709 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2710 {
2711   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2712   PetscErrorCode    ierr;
2713   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2714   PetscScalar       *x,*ptr,*from;
2715   Vec               bb1;
2716   const PetscScalar *b;
2717 
2718   PetscFunctionBegin;
2719   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2720   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2721 
2722   if (flag == SOR_APPLY_UPPER) {
2723     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2724     PetscFunctionReturn(0);
2725   }
2726 
2727   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2728     if (flag & SOR_ZERO_INITIAL_GUESS) {
2729       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2730       its--;
2731     }
2732 
2733     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2734     while (its--) {
2735 
2736       /* lower triangular part: slvec0b = - B^T*xx */
2737       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2738 
2739       /* copy xx into slvec0a */
2740       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2741       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2742       ierr = PetscArraycpy(ptr,x,bs*mbs);CHKERRQ(ierr);
2743       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2744 
2745       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2746 
2747       /* copy bb into slvec1a */
2748       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2749       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2750       ierr = PetscArraycpy(ptr,b,bs*mbs);CHKERRQ(ierr);
2751       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2752 
2753       /* set slvec1b = 0 */
2754       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2755 
2756       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2757       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2758       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2759       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2760 
2761       /* upper triangular part: bb1 = bb1 - B*x */
2762       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2763 
2764       /* local diagonal sweep */
2765       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2766     }
2767     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2768   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2769     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2770   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2771     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2772   } else if (flag & SOR_EISENSTAT) {
2773     Vec               xx1;
2774     PetscBool         hasop;
2775     const PetscScalar *diag;
2776     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2777     PetscInt          i,n;
2778 
2779     if (!mat->xx1) {
2780       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2781       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2782     }
2783     xx1 = mat->xx1;
2784     bb1 = mat->bb1;
2785 
2786     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2787 
2788     if (!mat->diag) {
2789       /* this is wrong for same matrix with new nonzero values */
2790       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2791       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2792     }
2793     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2794 
2795     if (hasop) {
2796       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2797       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2798     } else {
2799       /*
2800           These two lines are replaced by code that may be a bit faster for a good compiler
2801       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2802       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2803       */
2804       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2805       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2806       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2807       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2808       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2809       if (omega == 1.0) {
2810         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2811         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2812       } else {
2813         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2814         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2815       }
2816       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2817       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2818       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2819       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2820     }
2821 
2822     /* multiply off-diagonal portion of matrix */
2823     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2824     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2825     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2826     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2827     ierr = PetscArraycpy(from,x,bs*mbs);CHKERRQ(ierr);
2828     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2829     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2830     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2831     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2832     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2833 
2834     /* local sweep */
2835     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2836     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2837   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2842 {
2843   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2844   PetscErrorCode ierr;
2845   Vec            lvec1,bb1;
2846 
2847   PetscFunctionBegin;
2848   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2849   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2850 
2851   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2852     if (flag & SOR_ZERO_INITIAL_GUESS) {
2853       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2854       its--;
2855     }
2856 
2857     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2858     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2859     while (its--) {
2860       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2861 
2862       /* lower diagonal part: bb1 = bb - B^T*xx */
2863       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2864       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2865 
2866       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2867       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2868       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2869 
2870       /* upper diagonal part: bb1 = bb1 - B*x */
2871       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2872       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2873 
2874       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2875 
2876       /* diagonal sweep */
2877       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2878     }
2879     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2880     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2881   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 /*@
2886      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2887          CSR format the local rows.
2888 
2889    Collective
2890 
2891    Input Parameters:
2892 +  comm - MPI communicator
2893 .  bs - the block size, only a block size of 1 is supported
2894 .  m - number of local rows (Cannot be PETSC_DECIDE)
2895 .  n - This value should be the same as the local size used in creating the
2896        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2897        calculated if N is given) For square matrices n is almost always m.
2898 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2899 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2900 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2901 .   j - column indices
2902 -   a - matrix values
2903 
2904    Output Parameter:
2905 .   mat - the matrix
2906 
2907    Level: intermediate
2908 
2909    Notes:
2910        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2911      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2912      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2913 
2914        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2915 
2916 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2917           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2918 @*/
2919 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2920 {
2921   PetscErrorCode ierr;
2922 
2923 
2924   PetscFunctionBegin;
2925   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2926   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2927   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2928   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2929   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2930   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 
2935 /*@C
2936    MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in SBAIJ format using the given nonzero structure and (optional) numerical values
2937 
2938    Collective
2939 
2940    Input Parameters:
2941 +  B - the matrix
2942 .  bs - the block size
2943 .  i - the indices into j for the start of each local row (starts with zero)
2944 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2945 -  v - optional values in the matrix
2946 
2947    Level: advanced
2948 
2949    Notes:
2950    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2951    and usually the numerical values as well
2952 
2953    Any entries below the diagonal are ignored
2954 
2955 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2956 @*/
2957 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2958 {
2959   PetscErrorCode ierr;
2960 
2961   PetscFunctionBegin;
2962   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2963   PetscFunctionReturn(0);
2964 }
2965 
2966 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2967 {
2968   PetscErrorCode ierr;
2969   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2970   PetscInt       *indx;
2971   PetscScalar    *values;
2972 
2973   PetscFunctionBegin;
2974   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2975   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2976     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2977     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
2978     PetscInt       *bindx,rmax=a->rmax,j;
2979     PetscMPIInt    rank,size;
2980 
2981     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2982     mbs = m/bs; Nbs = N/cbs;
2983     if (n == PETSC_DECIDE) {
2984       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
2985     }
2986     nbs = n/cbs;
2987 
2988     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2989     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
2990 
2991     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2992     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
2993     if (rank == size-1) {
2994       /* Check sum(nbs) = Nbs */
2995       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
2996     }
2997 
2998     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
2999     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3000     for (i=0; i<mbs; i++) {
3001       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3002       nnz  = nnz/bs;
3003       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3004       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3005       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3006     }
3007     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3008     ierr = PetscFree(bindx);CHKERRQ(ierr);
3009 
3010     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3011     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3012     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3013     ierr = MatSetType(*outmat,MATSBAIJ);CHKERRQ(ierr);
3014     ierr = MatSeqSBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3015     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3016     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3017   }
3018 
3019   /* numeric phase */
3020   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3021   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3022 
3023   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3024   for (i=0; i<m; i++) {
3025     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3026     Ii   = i + rstart;
3027     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3028     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3029   }
3030   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3031   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3032   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3033   PetscFunctionReturn(0);
3034 }
3035