xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b2db74096e93f2ad99d78bc7b7c4aa33cbb6afe8)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
637       }
638       /* Bmat */
639       v = bmat->a; jj = bmat->j;
640       for (brow=0; brow<mbs; brow++) {
641         grow = bs*(rstart + brow);
642         nz = bmat->i[brow+1] - bmat->i[brow];
643         for (bcol=0; bcol<nz; bcol++) {
644           gcol = bs*garray[*jj]; jj++;
645           for (col=0; col<bs; col++) {
646             for (row=0; row<bs; row++) {
647               vabs            = PetscAbsScalar(*v); v++;
648               rsum[gcol+col] += vabs;
649               rsum[grow+row] += vabs;
650             }
651           }
652         }
653         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
654       }
655       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
656       *norm = 0.0;
657       for (col=0; col<mat->cmap->N; col++) {
658         if (rsum2[col] > *norm) *norm = rsum2[col];
659       }
660       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode ierr;
672   PetscInt       nstash,reallocs;
673 
674   PetscFunctionBegin;
675   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
676 
677   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
678   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
679   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
680   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
681   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
682   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
689 {
690   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
691   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
692   PetscErrorCode ierr;
693   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
694   PetscInt       *row,*col;
695   PetscBool      other_disassembled;
696   PetscMPIInt    n;
697   PetscBool      r1,r2,r3;
698   MatScalar      *val;
699 
700   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
701   PetscFunctionBegin;
702   if (!baij->donotstash &&  !mat->nooffprocentries) {
703     while (1) {
704       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
705       if (!flg) break;
706 
707       for (i=0; i<n;) {
708         /* Now identify the consecutive vals belonging to the same row */
709         for (j=i,rstart=row[j]; j<n; j++) {
710           if (row[j] != rstart) break;
711         }
712         if (j < n) ncols = j-i;
713         else       ncols = n-i;
714         /* Now assemble all these values with a single function call */
715         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
716         i    = j;
717       }
718     }
719     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
720     /* Now process the block-stash. Since the values are stashed column-oriented,
721        set the roworiented flag to column oriented, and after MatSetValues()
722        restore the original flags */
723     r1 = baij->roworiented;
724     r2 = a->roworiented;
725     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
726 
727     baij->roworiented = PETSC_FALSE;
728     a->roworiented    = PETSC_FALSE;
729 
730     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
731     while (1) {
732       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
733       if (!flg) break;
734 
735       for (i=0; i<n;) {
736         /* Now identify the consecutive vals belonging to the same row */
737         for (j=i,rstart=row[j]; j<n; j++) {
738           if (row[j] != rstart) break;
739         }
740         if (j < n) ncols = j-i;
741         else       ncols = n-i;
742         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
743         i    = j;
744       }
745     }
746     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
747 
748     baij->roworiented = r1;
749     a->roworiented    = r2;
750 
751     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
752   }
753 
754   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
755   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
756 
757   /* determine if any processor has disassembled, if so we must
758      also disassemble ourselfs, in order that we may reassemble. */
759   /*
760      if nonzero structure of submatrix B cannot change then we know that
761      no processor disassembled thus we can skip this stuff
762   */
763   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
764     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
765     if (mat->was_assembled && !other_disassembled) {
766       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
767     }
768   }
769 
770   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
771     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
772   }
773   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
774   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
775 
776   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
777 
778   baij->rowvalues = 0;
779 
780   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
781   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
782     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
783     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer);
789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
790 #include <petscdraw.h>
791 #undef __FUNCT__
792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
794 {
795   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
796   PetscErrorCode    ierr;
797   PetscInt          bs   = mat->rmap->bs;
798   PetscMPIInt       rank = baij->rank;
799   PetscBool         iascii,isdraw;
800   PetscViewer       sviewer;
801   PetscViewerFormat format;
802 
803   PetscFunctionBegin;
804   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
805   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
806   if (iascii) {
807     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
808     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
809       MatInfo info;
810       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
811       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
812       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
816       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
818       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
819       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
820       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
821       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
822       PetscFunctionReturn(0);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
825       PetscFunctionReturn(0);
826     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
827       PetscFunctionReturn(0);
828     }
829   }
830 
831   if (isdraw) {
832     PetscDraw draw;
833     PetscBool isnull;
834     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
835     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
836     if (isnull) PetscFunctionReturn(0);
837   }
838 
839   {
840     /* assemble the entire matrix onto first processor. */
841     Mat          A;
842     Mat_SeqSBAIJ *Aloc;
843     Mat_SeqBAIJ  *Bloc;
844     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
845     MatScalar    *a;
846     const char   *matname;
847 
848     /* Should this be the same type as mat? */
849     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
850     if (!rank) {
851       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
852     } else {
853       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
854     }
855     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
856     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
857     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
858     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
859 
860     /* copy over the A part */
861     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
862     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
863     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
864 
865     for (i=0; i<mbs; i++) {
866       rvals[0] = bs*(baij->rstartbs + i);
867       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
868       for (j=ai[i]; j<ai[i+1]; j++) {
869         col = (baij->cstartbs+aj[j])*bs;
870         for (k=0; k<bs; k++) {
871           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
872           col++;
873           a += bs;
874         }
875       }
876     }
877     /* copy over the B part */
878     Bloc = (Mat_SeqBAIJ*)baij->B->data;
879     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
880     for (i=0; i<mbs; i++) {
881 
882       rvals[0] = bs*(baij->rstartbs + i);
883       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
884       for (j=ai[i]; j<ai[i+1]; j++) {
885         col = baij->garray[aj[j]]*bs;
886         for (k=0; k<bs; k++) {
887           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
888           col++;
889           a += bs;
890         }
891       }
892     }
893     ierr = PetscFree(rvals);CHKERRQ(ierr);
894     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
895     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896     /*
897        Everyone has to call to draw the matrix since the graphics waits are
898        synchronized across all processors that share the PetscDraw object
899     */
900     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
901     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
902     if (!rank) {
903       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
904       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
905     }
906     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
907     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
908     ierr = MatDestroy(&A);CHKERRQ(ierr);
909   }
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatView_MPISBAIJ_Binary"
915 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
916 {
917   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
918   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
919   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
920   PetscErrorCode ierr;
921   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
922   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
923   int            fd;
924   PetscScalar    *column_values;
925   FILE           *file;
926   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
927   PetscInt       message_count,flowcontrolcount;
928 
929   PetscFunctionBegin;
930   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
931   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
932   nz   = bs2*(A->nz + B->nz);
933   rlen = mat->rmap->n;
934   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935   if (!rank) {
936     header[0] = MAT_FILE_CLASSID;
937     header[1] = mat->rmap->N;
938     header[2] = mat->cmap->N;
939 
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
941     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
942     /* get largest number of rows any processor has */
943     range = mat->rmap->range;
944     for (i=1; i<size; i++) {
945       rlen = PetscMax(rlen,range[i+1] - range[i]);
946     }
947   } else {
948     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
949   }
950 
951   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
952   /* compute lengths of each row  */
953   for (i=0; i<a->mbs; i++) {
954     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
955   }
956   /* store the row lengths to the file */
957   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
958   if (!rank) {
959     MPI_Status status;
960     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
961     rlen = (range[1] - range[0])/bs;
962     for (i=0; i<rlen; i++) {
963       for (j=0; j<bs; j++) {
964         row_lens[i*bs+j] = bs*crow_lens[i];
965       }
966     }
967     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     for (i=1; i<size; i++) {
969       rlen = (range[i+1] - range[i])/bs;
970       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
971       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
972       for (k=0; k<rlen; k++) {
973         for (j=0; j<bs; j++) {
974           row_lens[k*bs+j] = bs*crow_lens[k];
975         }
976       }
977       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
978     }
979     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
980     ierr = PetscFree(row_lens);CHKERRQ(ierr);
981   } else {
982     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
983     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
984     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
985   }
986   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
987 
988   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
989      information needed to make it for each row from a block row. This does require more communication but still not more than
990      the communication needed for the nonzero values  */
991   nzmax = nz; /*  space a largest processor needs */
992   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
993   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
994   cnt   = 0;
995   for (i=0; i<a->mbs; i++) {
996     pcnt = cnt;
997     for (j=B->i[i]; j<B->i[i+1]; j++) {
998       if ((col = garray[B->j[j]]) > cstart) break;
999       for (l=0; l<bs; l++) {
1000         column_indices[cnt++] = bs*col+l;
1001       }
1002     }
1003     for (k=A->i[i]; k<A->i[i+1]; k++) {
1004       for (l=0; l<bs; l++) {
1005         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1006       }
1007     }
1008     for (; j<B->i[i+1]; j++) {
1009       for (l=0; l<bs; l++) {
1010         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1011       }
1012     }
1013     len = cnt - pcnt;
1014     for (k=1; k<bs; k++) {
1015       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1016       cnt += len;
1017     }
1018   }
1019   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1020 
1021   /* store the columns to the file */
1022   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1023   if (!rank) {
1024     MPI_Status status;
1025     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1026     for (i=1; i<size; i++) {
1027       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1028       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1029       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1030       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1031     }
1032     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1033   } else {
1034     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1035     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1036     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1037     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1038   }
1039   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1040 
1041   /* load up the numerical values */
1042   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1043   cnt  = 0;
1044   for (i=0; i<a->mbs; i++) {
1045     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1046     for (j=B->i[i]; j<B->i[i+1]; j++) {
1047       if (garray[B->j[j]] > cstart) break;
1048       for (l=0; l<bs; l++) {
1049         for (ll=0; ll<bs; ll++) {
1050           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1051         }
1052       }
1053       cnt += bs;
1054     }
1055     for (k=A->i[i]; k<A->i[i+1]; k++) {
1056       for (l=0; l<bs; l++) {
1057         for (ll=0; ll<bs; ll++) {
1058           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1059         }
1060       }
1061       cnt += bs;
1062     }
1063     for (; j<B->i[i+1]; j++) {
1064       for (l=0; l<bs; l++) {
1065         for (ll=0; ll<bs; ll++) {
1066           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1067         }
1068       }
1069       cnt += bs;
1070     }
1071     cnt += (bs-1)*rlen;
1072   }
1073   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1074 
1075   /* store the column values to the file */
1076   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1077   if (!rank) {
1078     MPI_Status status;
1079     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1080     for (i=1; i<size; i++) {
1081       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1082       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1083       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1084       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1085     }
1086     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1087   } else {
1088     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1089     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1090     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1091     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1092   }
1093   ierr = PetscFree(column_values);CHKERRQ(ierr);
1094 
1095   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1096   if (file) {
1097     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1098   }
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatView_MPISBAIJ"
1104 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1105 {
1106   PetscErrorCode ierr;
1107   PetscBool      iascii,isdraw,issocket,isbinary;
1108 
1109   PetscFunctionBegin;
1110   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1111   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1112   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1113   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1114   if (iascii || isdraw || issocket) {
1115     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1116   } else if (isbinary) {
1117     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1118   }
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatDestroy_MPISBAIJ"
1124 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1125 {
1126   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130 #if defined(PETSC_USE_LOG)
1131   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1132 #endif
1133   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1134   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1135   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1136   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1137 #if defined(PETSC_USE_CTABLE)
1138   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1139 #else
1140   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1141 #endif
1142   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1143   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1144   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1145   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1146   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1147   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1148   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1149   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1150   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1151   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1152   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1153   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1154   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1155   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1156   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1157 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1158   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1159 #endif
1160   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1161   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1162   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1163   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1164 
1165   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
1171 #if defined(PETSC_HAVE_ELEMENTAL)
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1173 #endif
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
1179 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1180 {
1181   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1182   PetscErrorCode    ierr;
1183   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1184   PetscScalar       *from;
1185   const PetscScalar *x;
1186 
1187   PetscFunctionBegin;
1188   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1189   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1190 
1191   /* diagonal part */
1192   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1193   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1194 
1195   /* subdiagonal part */
1196   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1197 
1198   /* copy x into the vec slvec0 */
1199   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1200   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1201 
1202   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1203   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1204   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1205 
1206   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1208   /* supperdiagonal part */
1209   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatMult_MPISBAIJ"
1215 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1216 {
1217   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1218   PetscErrorCode    ierr;
1219   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1220   PetscScalar       *from;
1221   const PetscScalar *x;
1222 
1223   PetscFunctionBegin;
1224   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1225   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1226 
1227   /* diagonal part */
1228   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1229   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1230 
1231   /* subdiagonal part */
1232   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1233 
1234   /* copy x into the vec slvec0 */
1235   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1236   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1237 
1238   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1239   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1240   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1241 
1242   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1243   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1244   /* supperdiagonal part */
1245   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1251 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1252 {
1253   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1254   PetscErrorCode ierr;
1255   PetscInt       nt;
1256 
1257   PetscFunctionBegin;
1258   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1259   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1260 
1261   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1262   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1263 
1264   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265   /* do diagonal part */
1266   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1267   /* do supperdiagonal part */
1268   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1269   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1270   /* do subdiagonal part */
1271   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1272   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1273   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1279 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1280 {
1281   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1282   PetscErrorCode    ierr;
1283   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1284   PetscScalar       *from,zero=0.0;
1285   const PetscScalar *x;
1286 
1287   PetscFunctionBegin;
1288   /*
1289   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1290   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1291   */
1292   /* diagonal part */
1293   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1294   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1295 
1296   /* subdiagonal part */
1297   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1298 
1299   /* copy x into the vec slvec0 */
1300   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1301   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1302   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1303   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1304 
1305   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1307   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1308 
1309   /* supperdiagonal part */
1310   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1316 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1317 {
1318   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1319   PetscErrorCode ierr;
1320 
1321   PetscFunctionBegin;
1322   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323   /* do diagonal part */
1324   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1325   /* do supperdiagonal part */
1326   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1328 
1329   /* do subdiagonal part */
1330   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1331   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1332   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 /*
1337   This only works correctly for square matrices where the subblock A->A is the
1338    diagonal block
1339 */
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1342 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1343 {
1344   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346 
1347   PetscFunctionBegin;
1348   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1349   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatScale_MPISBAIJ"
1355 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1356 {
1357   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359 
1360   PetscFunctionBegin;
1361   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1362   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1368 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1369 {
1370   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1371   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1372   PetscErrorCode ierr;
1373   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1374   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1375   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1376 
1377   PetscFunctionBegin;
1378   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1379   mat->getrowactive = PETSC_TRUE;
1380 
1381   if (!mat->rowvalues && (idx || v)) {
1382     /*
1383         allocate enough space to hold information from the longest row.
1384     */
1385     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1386     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1387     PetscInt     max = 1,mbs = mat->mbs,tmp;
1388     for (i=0; i<mbs; i++) {
1389       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1390       if (max < tmp) max = tmp;
1391     }
1392     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1393   }
1394 
1395   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1396   lrow = row - brstart;  /* local row index */
1397 
1398   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1399   if (!v)   {pvA = 0; pvB = 0;}
1400   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1401   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1402   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1403   nztot = nzA + nzB;
1404 
1405   cmap = mat->garray;
1406   if (v  || idx) {
1407     if (nztot) {
1408       /* Sort by increasing column numbers, assuming A and B already sorted */
1409       PetscInt imark = -1;
1410       if (v) {
1411         *v = v_p = mat->rowvalues;
1412         for (i=0; i<nzB; i++) {
1413           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1414           else break;
1415         }
1416         imark = i;
1417         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1418         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1419       }
1420       if (idx) {
1421         *idx = idx_p = mat->rowindices;
1422         if (imark > -1) {
1423           for (i=0; i<imark; i++) {
1424             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1425           }
1426         } else {
1427           for (i=0; i<nzB; i++) {
1428             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1429             else break;
1430           }
1431           imark = i;
1432         }
1433         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1434         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1435       }
1436     } else {
1437       if (idx) *idx = 0;
1438       if (v)   *v   = 0;
1439     }
1440   }
1441   *nz  = nztot;
1442   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1443   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1449 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1450 {
1451   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1452 
1453   PetscFunctionBegin;
1454   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1455   baij->getrowactive = PETSC_FALSE;
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1461 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1462 {
1463   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1464   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1465 
1466   PetscFunctionBegin;
1467   aA->getrow_utriangular = PETSC_TRUE;
1468   PetscFunctionReturn(0);
1469 }
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1472 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1473 {
1474   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1475   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1476 
1477   PetscFunctionBegin;
1478   aA->getrow_utriangular = PETSC_FALSE;
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1484 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1485 {
1486   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1487   PetscErrorCode ierr;
1488 
1489   PetscFunctionBegin;
1490   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1491   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1497 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1498 {
1499   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1500   PetscErrorCode ierr;
1501 
1502   PetscFunctionBegin;
1503   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1504   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1509    Input: isrow       - distributed(parallel),
1510           iscol_local - locally owned (seq)
1511 */
1512 #undef __FUNCT__
1513 #define __FUNCT__ "ISEqual_private"
1514 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1515 {
1516   PetscErrorCode ierr;
1517   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1518   const PetscInt *ptr1,*ptr2;
1519 
1520   PetscFunctionBegin;
1521   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1522   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1523   if (sz1 > sz2) {
1524     *flg = PETSC_FALSE;
1525     PetscFunctionReturn(0);
1526   }
1527 
1528   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1529   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1530 
1531   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1532   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1533   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1534   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1535   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1536   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1537 
1538   nmatch=0;
1539   k     = 0;
1540   for (i=0; i<sz1; i++){
1541     for (j=k; j<sz2; j++){
1542       if (a1[i] == a2[j]) {
1543         k = j; nmatch++;
1544         break;
1545       }
1546     }
1547   }
1548   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1549   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1550   ierr = PetscFree(a1);CHKERRQ(ierr);
1551   ierr = PetscFree(a2);CHKERRQ(ierr);
1552   if (nmatch < sz1) {
1553     *flg = PETSC_FALSE;
1554   } else {
1555     *flg = PETSC_TRUE;
1556   }
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1562 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1563 {
1564   PetscErrorCode ierr;
1565   IS             iscol_local;
1566   PetscInt       csize;
1567   PetscBool      isequal;
1568 
1569   PetscFunctionBegin;
1570   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1571   if (call == MAT_REUSE_MATRIX) {
1572     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1573     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1574   } else {
1575     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1576     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1577     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1578   }
1579 
1580   /* now call MatGetSubMatrix_MPIBAIJ() */
1581   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1582   if (call == MAT_INITIAL_MATRIX) {
1583     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1584     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1585   }
1586   PetscFunctionReturn(0);
1587 }
1588 
1589 #undef __FUNCT__
1590 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1591 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1592 {
1593   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1598   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1604 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1605 {
1606   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1607   Mat            A  = a->A,B = a->B;
1608   PetscErrorCode ierr;
1609   PetscReal      isend[5],irecv[5];
1610 
1611   PetscFunctionBegin;
1612   info->block_size = (PetscReal)matin->rmap->bs;
1613 
1614   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1615 
1616   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1617   isend[3] = info->memory;  isend[4] = info->mallocs;
1618 
1619   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1620 
1621   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1622   isend[3] += info->memory;  isend[4] += info->mallocs;
1623   if (flag == MAT_LOCAL) {
1624     info->nz_used      = isend[0];
1625     info->nz_allocated = isend[1];
1626     info->nz_unneeded  = isend[2];
1627     info->memory       = isend[3];
1628     info->mallocs      = isend[4];
1629   } else if (flag == MAT_GLOBAL_MAX) {
1630     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1631 
1632     info->nz_used      = irecv[0];
1633     info->nz_allocated = irecv[1];
1634     info->nz_unneeded  = irecv[2];
1635     info->memory       = irecv[3];
1636     info->mallocs      = irecv[4];
1637   } else if (flag == MAT_GLOBAL_SUM) {
1638     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1639 
1640     info->nz_used      = irecv[0];
1641     info->nz_allocated = irecv[1];
1642     info->nz_unneeded  = irecv[2];
1643     info->memory       = irecv[3];
1644     info->mallocs      = irecv[4];
1645   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1646   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1647   info->fill_ratio_needed = 0;
1648   info->factor_mallocs    = 0;
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1654 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1655 {
1656   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1657   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1658   PetscErrorCode ierr;
1659 
1660   PetscFunctionBegin;
1661   switch (op) {
1662   case MAT_NEW_NONZERO_LOCATIONS:
1663   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1664   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1665   case MAT_KEEP_NONZERO_PATTERN:
1666   case MAT_NEW_NONZERO_LOCATION_ERR:
1667     MatCheckPreallocated(A,1);
1668     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1669     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1670     break;
1671   case MAT_ROW_ORIENTED:
1672     MatCheckPreallocated(A,1);
1673     a->roworiented = flg;
1674 
1675     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1676     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1677     break;
1678   case MAT_NEW_DIAGONALS:
1679     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1680     break;
1681   case MAT_IGNORE_OFF_PROC_ENTRIES:
1682     a->donotstash = flg;
1683     break;
1684   case MAT_USE_HASH_TABLE:
1685     a->ht_flag = flg;
1686     break;
1687   case MAT_HERMITIAN:
1688     MatCheckPreallocated(A,1);
1689     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1690     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1691 
1692     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1693     break;
1694   case MAT_SPD:
1695     A->spd_set = PETSC_TRUE;
1696     A->spd     = flg;
1697     if (flg) {
1698       A->symmetric                  = PETSC_TRUE;
1699       A->structurally_symmetric     = PETSC_TRUE;
1700       A->symmetric_set              = PETSC_TRUE;
1701       A->structurally_symmetric_set = PETSC_TRUE;
1702     }
1703     break;
1704   case MAT_SYMMETRIC:
1705     MatCheckPreallocated(A,1);
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     break;
1708   case MAT_STRUCTURALLY_SYMMETRIC:
1709     MatCheckPreallocated(A,1);
1710     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1711     break;
1712   case MAT_SYMMETRY_ETERNAL:
1713     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1714     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1715     break;
1716   case MAT_IGNORE_LOWER_TRIANGULAR:
1717     aA->ignore_ltriangular = flg;
1718     break;
1719   case MAT_ERROR_LOWER_TRIANGULAR:
1720     aA->ignore_ltriangular = flg;
1721     break;
1722   case MAT_GETROW_UPPERTRIANGULAR:
1723     aA->getrow_utriangular = flg;
1724     break;
1725   default:
1726     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1727   }
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1733 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1734 {
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   if (MAT_INITIAL_MATRIX || *B != A) {
1739     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1740   }
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1746 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1747 {
1748   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1749   Mat            a     = baij->A, b=baij->B;
1750   PetscErrorCode ierr;
1751   PetscInt       nv,m,n;
1752   PetscBool      flg;
1753 
1754   PetscFunctionBegin;
1755   if (ll != rr) {
1756     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1757     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1758   }
1759   if (!ll) PetscFunctionReturn(0);
1760 
1761   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1762   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1763 
1764   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1765   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1766 
1767   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1768 
1769   /* left diagonalscale the off-diagonal part */
1770   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1771 
1772   /* scale the diagonal part */
1773   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1774 
1775   /* right diagonalscale the off-diagonal part */
1776   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1777   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1778   PetscFunctionReturn(0);
1779 }
1780 
1781 #undef __FUNCT__
1782 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1783 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1784 {
1785   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1786   PetscErrorCode ierr;
1787 
1788   PetscFunctionBegin;
1789   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1794 
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatEqual_MPISBAIJ"
1797 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1798 {
1799   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1800   Mat            a,b,c,d;
1801   PetscBool      flg;
1802   PetscErrorCode ierr;
1803 
1804   PetscFunctionBegin;
1805   a = matA->A; b = matA->B;
1806   c = matB->A; d = matB->B;
1807 
1808   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1809   if (flg) {
1810     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1811   }
1812   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatCopy_MPISBAIJ"
1818 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1819 {
1820   PetscErrorCode ierr;
1821   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1822   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1823 
1824   PetscFunctionBegin;
1825   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1826   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1827     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1828     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1829     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1830   } else {
1831     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1832     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1833   }
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1839 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1840 {
1841   PetscErrorCode ierr;
1842 
1843   PetscFunctionBegin;
1844   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 #undef __FUNCT__
1849 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1850 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1851 {
1852   PetscErrorCode ierr;
1853   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1854   PetscBLASInt   bnz,one=1;
1855   Mat_SeqSBAIJ   *xa,*ya;
1856   Mat_SeqBAIJ    *xb,*yb;
1857 
1858   PetscFunctionBegin;
1859   if (str == SAME_NONZERO_PATTERN) {
1860     PetscScalar alpha = a;
1861     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1862     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1863     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1864     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1865     xb   = (Mat_SeqBAIJ*)xx->B->data;
1866     yb   = (Mat_SeqBAIJ*)yy->B->data;
1867     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1868     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1869     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1870   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1871     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1872     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1873     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1874   } else {
1875     Mat      B;
1876     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1877     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1878     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1879     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1880     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1881     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1882     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1883     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1884     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1885     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1886     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1887     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1888     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1889     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1890     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1891     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1892     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1893     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1894     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1895     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1896   }
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1902 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1903 {
1904   PetscErrorCode ierr;
1905   PetscInt       i;
1906   PetscBool      flg;
1907 
1908   PetscFunctionBegin;
1909   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1910   for (i=0; i<n; i++) {
1911     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1912     if (!flg) {
1913       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1914     }
1915   }
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatShift_MPISBAIJ"
1921 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1922 {
1923   PetscErrorCode ierr;
1924   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1925   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1926 
1927   PetscFunctionBegin;
1928   if (!Y->preallocated) {
1929     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1930   } else if (!aij->nz) {
1931     PetscInt nonew = aij->nonew;
1932     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1933     aij->nonew = nonew;
1934   }
1935   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ"
1941 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1942 {
1943   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1944   PetscErrorCode ierr;
1945 
1946   PetscFunctionBegin;
1947   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1948   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1949   if (d) {
1950     PetscInt rstart;
1951     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1952     *d += rstart/A->rmap->bs;
1953 
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 
1959 /* -------------------------------------------------------------------*/
1960 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1961                                        MatGetRow_MPISBAIJ,
1962                                        MatRestoreRow_MPISBAIJ,
1963                                        MatMult_MPISBAIJ,
1964                                /*  4*/ MatMultAdd_MPISBAIJ,
1965                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1966                                        MatMultAdd_MPISBAIJ,
1967                                        0,
1968                                        0,
1969                                        0,
1970                                /* 10*/ 0,
1971                                        0,
1972                                        0,
1973                                        MatSOR_MPISBAIJ,
1974                                        MatTranspose_MPISBAIJ,
1975                                /* 15*/ MatGetInfo_MPISBAIJ,
1976                                        MatEqual_MPISBAIJ,
1977                                        MatGetDiagonal_MPISBAIJ,
1978                                        MatDiagonalScale_MPISBAIJ,
1979                                        MatNorm_MPISBAIJ,
1980                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1981                                        MatAssemblyEnd_MPISBAIJ,
1982                                        MatSetOption_MPISBAIJ,
1983                                        MatZeroEntries_MPISBAIJ,
1984                                /* 24*/ 0,
1985                                        0,
1986                                        0,
1987                                        0,
1988                                        0,
1989                                /* 29*/ MatSetUp_MPISBAIJ,
1990                                        0,
1991                                        0,
1992                                        0,
1993                                        0,
1994                                /* 34*/ MatDuplicate_MPISBAIJ,
1995                                        0,
1996                                        0,
1997                                        0,
1998                                        0,
1999                                /* 39*/ MatAXPY_MPISBAIJ,
2000                                        MatGetSubMatrices_MPISBAIJ,
2001                                        MatIncreaseOverlap_MPISBAIJ,
2002                                        MatGetValues_MPISBAIJ,
2003                                        MatCopy_MPISBAIJ,
2004                                /* 44*/ 0,
2005                                        MatScale_MPISBAIJ,
2006                                        MatShift_MPISBAIJ,
2007                                        0,
2008                                        0,
2009                                /* 49*/ 0,
2010                                        0,
2011                                        0,
2012                                        0,
2013                                        0,
2014                                /* 54*/ 0,
2015                                        0,
2016                                        MatSetUnfactored_MPISBAIJ,
2017                                        0,
2018                                        MatSetValuesBlocked_MPISBAIJ,
2019                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                        0,
2024                                /* 64*/ 0,
2025                                        0,
2026                                        0,
2027                                        0,
2028                                        0,
2029                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2030                                        0,
2031                                        0,
2032                                        0,
2033                                        0,
2034                                /* 74*/ 0,
2035                                        0,
2036                                        0,
2037                                        0,
2038                                        0,
2039                                /* 79*/ 0,
2040                                        0,
2041                                        0,
2042                                        0,
2043                                        MatLoad_MPISBAIJ,
2044                                /* 84*/ 0,
2045                                        0,
2046                                        0,
2047                                        0,
2048                                        0,
2049                                /* 89*/ 0,
2050                                        0,
2051                                        0,
2052                                        0,
2053                                        0,
2054                                /* 94*/ 0,
2055                                        0,
2056                                        0,
2057                                        0,
2058                                        0,
2059                                /* 99*/ 0,
2060                                        0,
2061                                        0,
2062                                        0,
2063                                        0,
2064                                /*104*/ 0,
2065                                        MatRealPart_MPISBAIJ,
2066                                        MatImaginaryPart_MPISBAIJ,
2067                                        MatGetRowUpperTriangular_MPISBAIJ,
2068                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2069                                /*109*/ 0,
2070                                        0,
2071                                        0,
2072                                        0,
2073                                        MatMissingDiagonal_MPISBAIJ,
2074                                /*114*/ 0,
2075                                        0,
2076                                        0,
2077                                        0,
2078                                        0,
2079                                /*119*/ 0,
2080                                        0,
2081                                        0,
2082                                        0,
2083                                        0,
2084                                /*124*/ 0,
2085                                        0,
2086                                        0,
2087                                        0,
2088                                        0,
2089                                /*129*/ 0,
2090                                        0,
2091                                        0,
2092                                        0,
2093                                        0,
2094                                /*134*/ 0,
2095                                        0,
2096                                        0,
2097                                        0,
2098                                        0,
2099                                /*139*/ 0,
2100                                        0,
2101                                        0,
2102                                        0,
2103                                        0,
2104                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2105 };
2106 
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
2109 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
2110 {
2111   PetscFunctionBegin;
2112   *a = ((Mat_MPISBAIJ*)A->data)->A;
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
2118 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2119 {
2120   Mat_MPISBAIJ   *b;
2121   PetscErrorCode ierr;
2122   PetscInt       i,mbs,Mbs;
2123 
2124   PetscFunctionBegin;
2125   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2126   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2127   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2128   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2129 
2130   b   = (Mat_MPISBAIJ*)B->data;
2131   mbs = B->rmap->n/bs;
2132   Mbs = B->rmap->N/bs;
2133   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2134 
2135   B->rmap->bs = bs;
2136   b->bs2      = bs*bs;
2137   b->mbs      = mbs;
2138   b->Mbs      = Mbs;
2139   b->nbs      = B->cmap->n/bs;
2140   b->Nbs      = B->cmap->N/bs;
2141 
2142   for (i=0; i<=b->size; i++) {
2143     b->rangebs[i] = B->rmap->range[i]/bs;
2144   }
2145   b->rstartbs = B->rmap->rstart/bs;
2146   b->rendbs   = B->rmap->rend/bs;
2147 
2148   b->cstartbs = B->cmap->rstart/bs;
2149   b->cendbs   = B->cmap->rend/bs;
2150 
2151   if (!B->preallocated) {
2152     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2153     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2154     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2155     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2156     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2157     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2158     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2159     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2160     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2161   }
2162 
2163   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2164   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2165 
2166   B->preallocated = PETSC_TRUE;
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
2172 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2173 {
2174   PetscInt       m,rstart,cstart,cend;
2175   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2176   const PetscInt *JJ    =0;
2177   PetscScalar    *values=0;
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2182   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2183   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2184   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2185   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2186   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2187   m      = B->rmap->n/bs;
2188   rstart = B->rmap->rstart/bs;
2189   cstart = B->cmap->rstart/bs;
2190   cend   = B->cmap->rend/bs;
2191 
2192   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2193   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2194   for (i=0; i<m; i++) {
2195     nz = ii[i+1] - ii[i];
2196     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2197     nz_max = PetscMax(nz_max,nz);
2198     JJ     = jj + ii[i];
2199     for (j=0; j<nz; j++) {
2200       if (*JJ >= cstart) break;
2201       JJ++;
2202     }
2203     d = 0;
2204     for (; j<nz; j++) {
2205       if (*JJ++ >= cend) break;
2206       d++;
2207     }
2208     d_nnz[i] = d;
2209     o_nnz[i] = nz - d;
2210   }
2211   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2212   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2213 
2214   values = (PetscScalar*)V;
2215   if (!values) {
2216     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2217     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2218   }
2219   for (i=0; i<m; i++) {
2220     PetscInt          row    = i + rstart;
2221     PetscInt          ncols  = ii[i+1] - ii[i];
2222     const PetscInt    *icols = jj + ii[i];
2223     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2224     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2225   }
2226 
2227   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2228   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2229   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2230   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 /*MC
2235    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2236    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2237    the matrix is stored.
2238 
2239   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2240   can call MatSetOption(Mat, MAT_HERMITIAN);
2241 
2242    Options Database Keys:
2243 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2244 
2245   Level: beginner
2246 
2247 .seealso: MatCreateMPISBAIJ
2248 M*/
2249 
2250 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatCreate_MPISBAIJ"
2254 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2255 {
2256   Mat_MPISBAIJ   *b;
2257   PetscErrorCode ierr;
2258   PetscBool      flg = PETSC_FALSE;
2259 
2260   PetscFunctionBegin;
2261   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2262   B->data = (void*)b;
2263   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2264 
2265   B->ops->destroy = MatDestroy_MPISBAIJ;
2266   B->ops->view    = MatView_MPISBAIJ;
2267   B->assembled    = PETSC_FALSE;
2268   B->insertmode   = NOT_SET_VALUES;
2269 
2270   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2272 
2273   /* build local table of row and column ownerships */
2274   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2275 
2276   /* build cache for off array entries formed */
2277   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2278 
2279   b->donotstash  = PETSC_FALSE;
2280   b->colmap      = NULL;
2281   b->garray      = NULL;
2282   b->roworiented = PETSC_TRUE;
2283 
2284   /* stuff used in block assembly */
2285   b->barray = 0;
2286 
2287   /* stuff used for matrix vector multiply */
2288   b->lvec    = 0;
2289   b->Mvctx   = 0;
2290   b->slvec0  = 0;
2291   b->slvec0b = 0;
2292   b->slvec1  = 0;
2293   b->slvec1a = 0;
2294   b->slvec1b = 0;
2295   b->sMvctx  = 0;
2296 
2297   /* stuff for MatGetRow() */
2298   b->rowindices   = 0;
2299   b->rowvalues    = 0;
2300   b->getrowactive = PETSC_FALSE;
2301 
2302   /* hash table stuff */
2303   b->ht           = 0;
2304   b->hd           = 0;
2305   b->ht_size      = 0;
2306   b->ht_flag      = PETSC_FALSE;
2307   b->ht_fact      = 0;
2308   b->ht_total_ct  = 0;
2309   b->ht_insert_ct = 0;
2310 
2311   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2312   b->ijonly = PETSC_FALSE;
2313 
2314   b->in_loc = 0;
2315   b->v_loc  = 0;
2316   b->n_loc  = 0;
2317 
2318   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2319   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2320   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2321   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2322   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2323   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
2324 #if defined(PETSC_HAVE_ELEMENTAL)
2325   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2326 #endif
2327 
2328   B->symmetric                  = PETSC_TRUE;
2329   B->structurally_symmetric     = PETSC_TRUE;
2330   B->symmetric_set              = PETSC_TRUE;
2331   B->structurally_symmetric_set = PETSC_TRUE;
2332 
2333   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2334   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2335   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2336   if (flg) {
2337     PetscReal fact = 1.39;
2338     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2339     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2340     if (fact <= 1.0) fact = 1.39;
2341     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2342     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2343   }
2344   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2345   PetscFunctionReturn(0);
2346 }
2347 
2348 /*MC
2349    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2350 
2351    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2352    and MATMPISBAIJ otherwise.
2353 
2354    Options Database Keys:
2355 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2356 
2357   Level: beginner
2358 
2359 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2360 M*/
2361 
2362 #undef __FUNCT__
2363 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2364 /*@C
2365    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2366    the user should preallocate the matrix storage by setting the parameters
2367    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2368    performance can be increased by more than a factor of 50.
2369 
2370    Collective on Mat
2371 
2372    Input Parameters:
2373 +  B - the matrix
2374 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2375           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2376 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2377            submatrix  (same for all local rows)
2378 .  d_nnz - array containing the number of block nonzeros in the various block rows
2379            in the upper triangular and diagonal part of the in diagonal portion of the local
2380            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2381            for the diagonal entry and set a value even if it is zero.
2382 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2383            submatrix (same for all local rows).
2384 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2385            off-diagonal portion of the local submatrix that is right of the diagonal
2386            (possibly different for each block row) or NULL.
2387 
2388 
2389    Options Database Keys:
2390 .   -mat_no_unroll - uses code that does not unroll the loops in the
2391                      block calculations (much slower)
2392 .   -mat_block_size - size of the blocks to use
2393 
2394    Notes:
2395 
2396    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2397    than it must be used on all processors that share the object for that argument.
2398 
2399    If the *_nnz parameter is given then the *_nz parameter is ignored
2400 
2401    Storage Information:
2402    For a square global matrix we define each processor's diagonal portion
2403    to be its local rows and the corresponding columns (a square submatrix);
2404    each processor's off-diagonal portion encompasses the remainder of the
2405    local matrix (a rectangular submatrix).
2406 
2407    The user can specify preallocated storage for the diagonal part of
2408    the local submatrix with either d_nz or d_nnz (not both).  Set
2409    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2410    memory allocation.  Likewise, specify preallocated storage for the
2411    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2412 
2413    You can call MatGetInfo() to get information on how effective the preallocation was;
2414    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2415    You can also run with the option -info and look for messages with the string
2416    malloc in them to see if additional memory allocation was needed.
2417 
2418    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2419    the figure below we depict these three local rows and all columns (0-11).
2420 
2421 .vb
2422            0 1 2 3 4 5 6 7 8 9 10 11
2423           --------------------------
2424    row 3  |. . . d d d o o o o  o  o
2425    row 4  |. . . d d d o o o o  o  o
2426    row 5  |. . . d d d o o o o  o  o
2427           --------------------------
2428 .ve
2429 
2430    Thus, any entries in the d locations are stored in the d (diagonal)
2431    submatrix, and any entries in the o locations are stored in the
2432    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2433    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2434 
2435    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2436    plus the diagonal part of the d matrix,
2437    and o_nz should indicate the number of block nonzeros per row in the o matrix
2438 
2439    In general, for PDE problems in which most nonzeros are near the diagonal,
2440    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2441    or you will get TERRIBLE performance; see the users' manual chapter on
2442    matrices.
2443 
2444    Level: intermediate
2445 
2446 .keywords: matrix, block, aij, compressed row, sparse, parallel
2447 
2448 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2449 @*/
2450 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2451 {
2452   PetscErrorCode ierr;
2453 
2454   PetscFunctionBegin;
2455   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2456   PetscValidType(B,1);
2457   PetscValidLogicalCollectiveInt(B,bs,2);
2458   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatCreateSBAIJ"
2464 /*@C
2465    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2466    (block compressed row).  For good matrix assembly performance
2467    the user should preallocate the matrix storage by setting the parameters
2468    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2469    performance can be increased by more than a factor of 50.
2470 
2471    Collective on MPI_Comm
2472 
2473    Input Parameters:
2474 +  comm - MPI communicator
2475 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2476           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2477 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2478            This value should be the same as the local size used in creating the
2479            y vector for the matrix-vector product y = Ax.
2480 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2481            This value should be the same as the local size used in creating the
2482            x vector for the matrix-vector product y = Ax.
2483 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2484 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2485 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2486            submatrix  (same for all local rows)
2487 .  d_nnz - array containing the number of block nonzeros in the various block rows
2488            in the upper triangular portion of the in diagonal portion of the local
2489            (possibly different for each block block row) or NULL.
2490            If you plan to factor the matrix you must leave room for the diagonal entry and
2491            set its value even if it is zero.
2492 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2493            submatrix (same for all local rows).
2494 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2495            off-diagonal portion of the local submatrix (possibly different for
2496            each block row) or NULL.
2497 
2498    Output Parameter:
2499 .  A - the matrix
2500 
2501    Options Database Keys:
2502 .   -mat_no_unroll - uses code that does not unroll the loops in the
2503                      block calculations (much slower)
2504 .   -mat_block_size - size of the blocks to use
2505 .   -mat_mpi - use the parallel matrix data structures even on one processor
2506                (defaults to using SeqBAIJ format on one processor)
2507 
2508    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2509    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2510    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2511 
2512    Notes:
2513    The number of rows and columns must be divisible by blocksize.
2514    This matrix type does not support complex Hermitian operation.
2515 
2516    The user MUST specify either the local or global matrix dimensions
2517    (possibly both).
2518 
2519    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2520    than it must be used on all processors that share the object for that argument.
2521 
2522    If the *_nnz parameter is given then the *_nz parameter is ignored
2523 
2524    Storage Information:
2525    For a square global matrix we define each processor's diagonal portion
2526    to be its local rows and the corresponding columns (a square submatrix);
2527    each processor's off-diagonal portion encompasses the remainder of the
2528    local matrix (a rectangular submatrix).
2529 
2530    The user can specify preallocated storage for the diagonal part of
2531    the local submatrix with either d_nz or d_nnz (not both).  Set
2532    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2533    memory allocation.  Likewise, specify preallocated storage for the
2534    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2535 
2536    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2537    the figure below we depict these three local rows and all columns (0-11).
2538 
2539 .vb
2540            0 1 2 3 4 5 6 7 8 9 10 11
2541           --------------------------
2542    row 3  |. . . d d d o o o o  o  o
2543    row 4  |. . . d d d o o o o  o  o
2544    row 5  |. . . d d d o o o o  o  o
2545           --------------------------
2546 .ve
2547 
2548    Thus, any entries in the d locations are stored in the d (diagonal)
2549    submatrix, and any entries in the o locations are stored in the
2550    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2551    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2552 
2553    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2554    plus the diagonal part of the d matrix,
2555    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2556    In general, for PDE problems in which most nonzeros are near the diagonal,
2557    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2558    or you will get TERRIBLE performance; see the users' manual chapter on
2559    matrices.
2560 
2561    Level: intermediate
2562 
2563 .keywords: matrix, block, aij, compressed row, sparse, parallel
2564 
2565 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2566 @*/
2567 
2568 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2569 {
2570   PetscErrorCode ierr;
2571   PetscMPIInt    size;
2572 
2573   PetscFunctionBegin;
2574   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2575   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2576   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2577   if (size > 1) {
2578     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2579     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2580   } else {
2581     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2582     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2583   }
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2590 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2591 {
2592   Mat            mat;
2593   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2594   PetscErrorCode ierr;
2595   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2596   PetscScalar    *array;
2597 
2598   PetscFunctionBegin;
2599   *newmat = 0;
2600 
2601   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2602   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2603   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2604   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2605   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2606   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2607 
2608   mat->factortype   = matin->factortype;
2609   mat->preallocated = PETSC_TRUE;
2610   mat->assembled    = PETSC_TRUE;
2611   mat->insertmode   = NOT_SET_VALUES;
2612 
2613   a      = (Mat_MPISBAIJ*)mat->data;
2614   a->bs2 = oldmat->bs2;
2615   a->mbs = oldmat->mbs;
2616   a->nbs = oldmat->nbs;
2617   a->Mbs = oldmat->Mbs;
2618   a->Nbs = oldmat->Nbs;
2619 
2620 
2621   a->size         = oldmat->size;
2622   a->rank         = oldmat->rank;
2623   a->donotstash   = oldmat->donotstash;
2624   a->roworiented  = oldmat->roworiented;
2625   a->rowindices   = 0;
2626   a->rowvalues    = 0;
2627   a->getrowactive = PETSC_FALSE;
2628   a->barray       = 0;
2629   a->rstartbs     = oldmat->rstartbs;
2630   a->rendbs       = oldmat->rendbs;
2631   a->cstartbs     = oldmat->cstartbs;
2632   a->cendbs       = oldmat->cendbs;
2633 
2634   /* hash table stuff */
2635   a->ht           = 0;
2636   a->hd           = 0;
2637   a->ht_size      = 0;
2638   a->ht_flag      = oldmat->ht_flag;
2639   a->ht_fact      = oldmat->ht_fact;
2640   a->ht_total_ct  = 0;
2641   a->ht_insert_ct = 0;
2642 
2643   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2644   if (oldmat->colmap) {
2645 #if defined(PETSC_USE_CTABLE)
2646     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2647 #else
2648     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2649     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2650     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2651 #endif
2652   } else a->colmap = 0;
2653 
2654   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2655     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2656     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2657     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2658   } else a->garray = 0;
2659 
2660   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2661   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2662   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2663   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2664   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2665 
2666   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2667   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2668   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2669   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2670 
2671   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2672   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2673   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2674   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2675   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2676   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2677   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2678   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2679   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2680   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2681   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2682   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2683   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2684 
2685   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2686   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2687   a->sMvctx = oldmat->sMvctx;
2688   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2689 
2690   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2691   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2692   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2693   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2694   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2695   *newmat = mat;
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatLoad_MPISBAIJ"
2701 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2702 {
2703   PetscErrorCode ierr;
2704   PetscInt       i,nz,j,rstart,rend;
2705   PetscScalar    *vals,*buf;
2706   MPI_Comm       comm;
2707   MPI_Status     status;
2708   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2709   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2710   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2711   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2712   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2713   PetscInt       dcount,kmax,k,nzcount,tmp;
2714   int            fd;
2715 
2716   PetscFunctionBegin;
2717   /* force binary viewer to load .info file if it has not yet done so */
2718   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2719   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2720   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2721   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2722   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2723   if (bs < 0) bs = 1;
2724 
2725   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2726   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2727   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2728   if (!rank) {
2729     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2730     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2731     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2732   }
2733 
2734   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2735   M    = header[1];
2736   N    = header[2];
2737 
2738   /* If global sizes are set, check if they are consistent with that given in the file */
2739   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2740   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2741 
2742   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2743 
2744   /*
2745      This code adds extra rows to make sure the number of rows is
2746      divisible by the blocksize
2747   */
2748   Mbs        = M/bs;
2749   extra_rows = bs - M + bs*(Mbs);
2750   if (extra_rows == bs) extra_rows = 0;
2751   else                  Mbs++;
2752   if (extra_rows &&!rank) {
2753     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2754   }
2755 
2756   /* determine ownership of all rows */
2757   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2758     mbs = Mbs/size + ((Mbs % size) > rank);
2759     m   = mbs*bs;
2760   } else { /* User Set */
2761     m   = newmat->rmap->n;
2762     mbs = m/bs;
2763   }
2764   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2765   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2766   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2767   rowners[0] = 0;
2768   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2769   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2770   rstart = rowners[rank];
2771   rend   = rowners[rank+1];
2772 
2773   /* distribute row lengths to all processors */
2774   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2775   if (!rank) {
2776     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2777     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2778     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2779     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2780     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2781     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2782     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2783   } else {
2784     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2785   }
2786 
2787   if (!rank) {   /* procs[0] */
2788     /* calculate the number of nonzeros on each processor */
2789     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2790     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2791     for (i=0; i<size; i++) {
2792       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2793         procsnz[i] += rowlengths[j];
2794       }
2795     }
2796     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2797 
2798     /* determine max buffer needed and allocate it */
2799     maxnz = 0;
2800     for (i=0; i<size; i++) {
2801       maxnz = PetscMax(maxnz,procsnz[i]);
2802     }
2803     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2804 
2805     /* read in my part of the matrix column indices  */
2806     nz     = procsnz[0];
2807     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2808     mycols = ibuf;
2809     if (size == 1) nz -= extra_rows;
2810     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2811     if (size == 1) {
2812       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2813     }
2814 
2815     /* read in every ones (except the last) and ship off */
2816     for (i=1; i<size-1; i++) {
2817       nz   = procsnz[i];
2818       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2819       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2820     }
2821     /* read in the stuff for the last proc */
2822     if (size != 1) {
2823       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2824       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2825       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2826       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2827     }
2828     ierr = PetscFree(cols);CHKERRQ(ierr);
2829   } else {  /* procs[i], i>0 */
2830     /* determine buffer space needed for message */
2831     nz = 0;
2832     for (i=0; i<m; i++) nz += locrowlens[i];
2833     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2834     mycols = ibuf;
2835     /* receive message of column indices*/
2836     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2837     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2838     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2839   }
2840 
2841   /* loop over local rows, determining number of off diagonal entries */
2842   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2843   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2844   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2845   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2846   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2847   rowcount = 0;
2848   nzcount  = 0;
2849   for (i=0; i<mbs; i++) {
2850     dcount  = 0;
2851     odcount = 0;
2852     for (j=0; j<bs; j++) {
2853       kmax = locrowlens[rowcount];
2854       for (k=0; k<kmax; k++) {
2855         tmp = mycols[nzcount++]/bs; /* block col. index */
2856         if (!mask[tmp]) {
2857           mask[tmp] = 1;
2858           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2859           else masked1[dcount++] = tmp; /* entry in diag portion */
2860         }
2861       }
2862       rowcount++;
2863     }
2864 
2865     dlens[i]  = dcount;  /* d_nzz[i] */
2866     odlens[i] = odcount; /* o_nzz[i] */
2867 
2868     /* zero out the mask elements we set */
2869     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2870     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2871   }
2872   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2873   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2874   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2875 
2876   if (!rank) {
2877     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2878     /* read in my part of the matrix numerical values  */
2879     nz     = procsnz[0];
2880     vals   = buf;
2881     mycols = ibuf;
2882     if (size == 1) nz -= extra_rows;
2883     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2884     if (size == 1) {
2885       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2886     }
2887 
2888     /* insert into matrix */
2889     jj = rstart*bs;
2890     for (i=0; i<m; i++) {
2891       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2892       mycols += locrowlens[i];
2893       vals   += locrowlens[i];
2894       jj++;
2895     }
2896 
2897     /* read in other processors (except the last one) and ship out */
2898     for (i=1; i<size-1; i++) {
2899       nz   = procsnz[i];
2900       vals = buf;
2901       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2902       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2903     }
2904     /* the last proc */
2905     if (size != 1) {
2906       nz   = procsnz[i] - extra_rows;
2907       vals = buf;
2908       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2909       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2910       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2911     }
2912     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2913 
2914   } else {
2915     /* receive numeric values */
2916     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2917 
2918     /* receive message of values*/
2919     vals   = buf;
2920     mycols = ibuf;
2921     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2922     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2923     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2924 
2925     /* insert into matrix */
2926     jj = rstart*bs;
2927     for (i=0; i<m; i++) {
2928       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2929       mycols += locrowlens[i];
2930       vals   += locrowlens[i];
2931       jj++;
2932     }
2933   }
2934 
2935   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2936   ierr = PetscFree(buf);CHKERRQ(ierr);
2937   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2938   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2939   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2940   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2941   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2942   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2943   PetscFunctionReturn(0);
2944 }
2945 
2946 #undef __FUNCT__
2947 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2948 /*XXXXX@
2949    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2950 
2951    Input Parameters:
2952 .  mat  - the matrix
2953 .  fact - factor
2954 
2955    Not Collective on Mat, each process can have a different hash factor
2956 
2957    Level: advanced
2958 
2959   Notes:
2960    This can also be set by the command line option: -mat_use_hash_table fact
2961 
2962 .keywords: matrix, hashtable, factor, HT
2963 
2964 .seealso: MatSetOption()
2965 @XXXXX*/
2966 
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2970 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2971 {
2972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2973   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2974   PetscReal      atmp;
2975   PetscReal      *work,*svalues,*rvalues;
2976   PetscErrorCode ierr;
2977   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2978   PetscMPIInt    rank,size;
2979   PetscInt       *rowners_bs,dest,count,source;
2980   PetscScalar    *va;
2981   MatScalar      *ba;
2982   MPI_Status     stat;
2983 
2984   PetscFunctionBegin;
2985   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2986   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2987   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2988 
2989   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2990   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2991 
2992   bs  = A->rmap->bs;
2993   mbs = a->mbs;
2994   Mbs = a->Mbs;
2995   ba  = b->a;
2996   bi  = b->i;
2997   bj  = b->j;
2998 
2999   /* find ownerships */
3000   rowners_bs = A->rmap->range;
3001 
3002   /* each proc creates an array to be distributed */
3003   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3004   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
3005 
3006   /* row_max for B */
3007   if (rank != size-1) {
3008     for (i=0; i<mbs; i++) {
3009       ncols = bi[1] - bi[0]; bi++;
3010       brow  = bs*i;
3011       for (j=0; j<ncols; j++) {
3012         bcol = bs*(*bj);
3013         for (kcol=0; kcol<bs; kcol++) {
3014           col  = bcol + kcol;                /* local col index */
3015           col += rowners_bs[rank+1];      /* global col index */
3016           for (krow=0; krow<bs; krow++) {
3017             atmp = PetscAbsScalar(*ba); ba++;
3018             row  = brow + krow;   /* local row index */
3019             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3020             if (work[col] < atmp) work[col] = atmp;
3021           }
3022         }
3023         bj++;
3024       }
3025     }
3026 
3027     /* send values to its owners */
3028     for (dest=rank+1; dest<size; dest++) {
3029       svalues = work + rowners_bs[dest];
3030       count   = rowners_bs[dest+1]-rowners_bs[dest];
3031       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3032     }
3033   }
3034 
3035   /* receive values */
3036   if (rank) {
3037     rvalues = work;
3038     count   = rowners_bs[rank+1]-rowners_bs[rank];
3039     for (source=0; source<rank; source++) {
3040       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3041       /* process values */
3042       for (i=0; i<count; i++) {
3043         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3044       }
3045     }
3046   }
3047 
3048   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3049   ierr = PetscFree(work);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 #undef __FUNCT__
3054 #define __FUNCT__ "MatSOR_MPISBAIJ"
3055 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3056 {
3057   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3058   PetscErrorCode    ierr;
3059   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3060   PetscScalar       *x,*ptr,*from;
3061   Vec               bb1;
3062   const PetscScalar *b;
3063 
3064   PetscFunctionBegin;
3065   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3066   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3067 
3068   if (flag == SOR_APPLY_UPPER) {
3069     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3070     PetscFunctionReturn(0);
3071   }
3072 
3073   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3074     if (flag & SOR_ZERO_INITIAL_GUESS) {
3075       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3076       its--;
3077     }
3078 
3079     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3080     while (its--) {
3081 
3082       /* lower triangular part: slvec0b = - B^T*xx */
3083       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3084 
3085       /* copy xx into slvec0a */
3086       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3087       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3088       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3089       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3090 
3091       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3092 
3093       /* copy bb into slvec1a */
3094       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3095       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3096       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3097       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3098 
3099       /* set slvec1b = 0 */
3100       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3101 
3102       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3103       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3104       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3105       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3106 
3107       /* upper triangular part: bb1 = bb1 - B*x */
3108       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3109 
3110       /* local diagonal sweep */
3111       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3112     }
3113     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3114   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3115     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3116   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3117     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3118   } else if (flag & SOR_EISENSTAT) {
3119     Vec               xx1;
3120     PetscBool         hasop;
3121     const PetscScalar *diag;
3122     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3123     PetscInt          i,n;
3124 
3125     if (!mat->xx1) {
3126       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3127       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3128     }
3129     xx1 = mat->xx1;
3130     bb1 = mat->bb1;
3131 
3132     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3133 
3134     if (!mat->diag) {
3135       /* this is wrong for same matrix with new nonzero values */
3136       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3137       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3138     }
3139     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3140 
3141     if (hasop) {
3142       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3143       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3144     } else {
3145       /*
3146           These two lines are replaced by code that may be a bit faster for a good compiler
3147       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3148       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3149       */
3150       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3151       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3152       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3153       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3154       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3155       if (omega == 1.0) {
3156         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3157         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3158       } else {
3159         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3160         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3161       }
3162       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3163       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3164       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3165       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3166     }
3167 
3168     /* multiply off-diagonal portion of matrix */
3169     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3170     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3171     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3172     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3173     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3174     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3175     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3176     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3177     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3178     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3179 
3180     /* local sweep */
3181     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3182     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3183   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3184   PetscFunctionReturn(0);
3185 }
3186 
3187 #undef __FUNCT__
3188 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
3189 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3190 {
3191   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3192   PetscErrorCode ierr;
3193   Vec            lvec1,bb1;
3194 
3195   PetscFunctionBegin;
3196   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3197   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3198 
3199   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3200     if (flag & SOR_ZERO_INITIAL_GUESS) {
3201       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3202       its--;
3203     }
3204 
3205     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3206     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3207     while (its--) {
3208       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3209 
3210       /* lower diagonal part: bb1 = bb - B^T*xx */
3211       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3212       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3213 
3214       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3215       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3216       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3217 
3218       /* upper diagonal part: bb1 = bb1 - B*x */
3219       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3220       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3221 
3222       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3223 
3224       /* diagonal sweep */
3225       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3226     }
3227     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3228     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3229   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3230   PetscFunctionReturn(0);
3231 }
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3235 /*@
3236      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3237          CSR format the local rows.
3238 
3239    Collective on MPI_Comm
3240 
3241    Input Parameters:
3242 +  comm - MPI communicator
3243 .  bs - the block size, only a block size of 1 is supported
3244 .  m - number of local rows (Cannot be PETSC_DECIDE)
3245 .  n - This value should be the same as the local size used in creating the
3246        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3247        calculated if N is given) For square matrices n is almost always m.
3248 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3249 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3250 .   i - row indices
3251 .   j - column indices
3252 -   a - matrix values
3253 
3254    Output Parameter:
3255 .   mat - the matrix
3256 
3257    Level: intermediate
3258 
3259    Notes:
3260        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3261      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3262      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3263 
3264        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3265 
3266 .keywords: matrix, aij, compressed row, sparse, parallel
3267 
3268 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3269           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3270 @*/
3271 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3272 {
3273   PetscErrorCode ierr;
3274 
3275 
3276   PetscFunctionBegin;
3277   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3278   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3279   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3280   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3281   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3282   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 
3287 #undef __FUNCT__
3288 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3289 /*@C
3290    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3291    (the default parallel PETSc format).
3292 
3293    Collective on MPI_Comm
3294 
3295    Input Parameters:
3296 +  B - the matrix
3297 .  bs - the block size
3298 .  i - the indices into j for the start of each local row (starts with zero)
3299 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3300 -  v - optional values in the matrix
3301 
3302    Level: developer
3303 
3304 .keywords: matrix, aij, compressed row, sparse, parallel
3305 
3306 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3307 @*/
3308 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3309 {
3310   PetscErrorCode ierr;
3311 
3312   PetscFunctionBegin;
3313   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3314   PetscFunctionReturn(0);
3315 }
3316 
3317 #undef __FUNCT__
3318 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3319 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3320 {
3321   PetscErrorCode ierr;
3322   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3323   PetscInt       *indx;
3324   PetscScalar    *values;
3325 
3326   PetscFunctionBegin;
3327   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3328   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3329     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3330     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3331     PetscInt       *bindx,rmax=a->rmax,j;
3332 
3333     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3334     mbs = m/bs; Nbs = N/cbs;
3335     if (n == PETSC_DECIDE) {
3336       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3337     }
3338     /* Check sum(n) = Nbs */
3339     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3340     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3341 
3342     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3343     rstart -= mbs;
3344 
3345     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3346     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3347     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3348     for (i=0; i<mbs; i++) {
3349       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3350       nnz = nnz/bs;
3351       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3352       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3353       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3354     }
3355     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3356     ierr = PetscFree(bindx);CHKERRQ(ierr);
3357 
3358     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3359     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3360     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3361     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3362     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3363     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3364   }
3365 
3366   /* numeric phase */
3367   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3368   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3369 
3370   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3371   for (i=0; i<m; i++) {
3372     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3373     Ii   = i + rstart;
3374     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3375     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3376   }
3377   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3378   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3379   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3380   PetscFunctionReturn(0);
3381 }
3382