xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision cf37664f2dbf3effe674ec5ac2718028c1621ae0)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,orow,ocol)      \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,orow,ocol) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_SeqSBAIJ_Inlined"
240 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
241 {
242   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ*)A->data;
243   PetscErrorCode    ierr;
244   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
245   PetscInt          *imax      =a->imax,*ai=a->i,*ailen=a->ilen;
246   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
247   PetscBool         roworiented=a->roworiented;
248   const PetscScalar *value     = v;
249   MatScalar         *ap,*aa = a->a,*bap;
250 
251   PetscFunctionBegin;
252   if (col < row) {
253     if (a->ignore_ltriangular) PetscFunctionReturn(0); /* ignore lower triangular block */
254     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
255   }
256   rp   = aj + ai[row];
257   ap   = aa + bs2*ai[row];
258   rmax = imax[row];
259   nrow = ailen[row];
260   value = v;
261   low   = 0;
262   high  = nrow;
263 
264   while (high-low > 7) {
265     t = (low+high)/2;
266     if (rp[t] > col) high = t;
267     else             low  = t;
268   }
269   for (i=low; i<high; i++) {
270     if (rp[i] > col) break;
271     if (rp[i] == col) {
272       bap = ap +  bs2*i;
273       if (roworiented) {
274         if (is == ADD_VALUES) {
275           for (ii=0; ii<bs; ii++) {
276             for (jj=ii; jj<bs2; jj+=bs) {
277               bap[jj] += *value++;
278             }
279           }
280         } else {
281           for (ii=0; ii<bs; ii++) {
282             for (jj=ii; jj<bs2; jj+=bs) {
283               bap[jj] = *value++;
284             }
285           }
286         }
287       } else {
288         if (is == ADD_VALUES) {
289           for (ii=0; ii<bs; ii++) {
290             for (jj=0; jj<bs; jj++) {
291               *bap++ += *value++;
292             }
293           }
294         } else {
295           for (ii=0; ii<bs; ii++) {
296             for (jj=0; jj<bs; jj++) {
297               *bap++  = *value++;
298             }
299           }
300         }
301       }
302       goto noinsert2;
303     }
304   }
305   if (nonew == 1) goto noinsert2;
306   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new block index nonzero block (%D, %D) in the matrix", orow, ocol);
307   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
308   N = nrow++ - 1; high++;
309   /* shift up all the later entries in this row */
310   for (ii=N; ii>=i; ii--) {
311     rp[ii+1] = rp[ii];
312     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
313   }
314   if (N >= i) {
315     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
316   }
317   rp[i] = col;
318   bap   = ap +  bs2*i;
319   if (roworiented) {
320     for (ii=0; ii<bs; ii++) {
321       for (jj=ii; jj<bs2; jj+=bs) {
322         bap[jj] = *value++;
323       }
324     }
325   } else {
326     for (ii=0; ii<bs; ii++) {
327       for (jj=0; jj<bs; jj++) {
328         *bap++ = *value++;
329       }
330     }
331   }
332   noinsert2:;
333   ailen[row] = nrow;
334   PetscFunctionReturn(0);
335 }
336 
337 #undef __FUNCT__
338 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ_Inlined"
339 /*
340    This routine is exactly duplicated in mpibaij.c
341 */
342 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
343 {
344   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
345   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
346   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
347   PetscErrorCode    ierr;
348   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
349   PetscBool         roworiented=a->roworiented;
350   const PetscScalar *value     = v;
351   MatScalar         *ap,*aa = a->a,*bap;
352 
353   PetscFunctionBegin;
354   rp   = aj + ai[row];
355   ap   = aa + bs2*ai[row];
356   rmax = imax[row];
357   nrow = ailen[row];
358   low  = 0;
359   high = nrow;
360   value = v;
361   while (high-low > 7) {
362     t = (low+high)/2;
363     if (rp[t] > col) high = t;
364     else             low  = t;
365   }
366   for (i=low; i<high; i++) {
367     if (rp[i] > col) break;
368     if (rp[i] == col) {
369       bap = ap +  bs2*i;
370       if (roworiented) {
371         if (is == ADD_VALUES) {
372           for (ii=0; ii<bs; ii++) {
373             for (jj=ii; jj<bs2; jj+=bs) {
374               bap[jj] += *value++;
375             }
376           }
377         } else {
378           for (ii=0; ii<bs; ii++) {
379             for (jj=ii; jj<bs2; jj+=bs) {
380               bap[jj] = *value++;
381             }
382           }
383         }
384       } else {
385         if (is == ADD_VALUES) {
386           for (ii=0; ii<bs; ii++,value+=bs) {
387             for (jj=0; jj<bs; jj++) {
388               bap[jj] += value[jj];
389             }
390             bap += bs;
391           }
392         } else {
393           for (ii=0; ii<bs; ii++,value+=bs) {
394             for (jj=0; jj<bs; jj++) {
395               bap[jj]  = value[jj];
396             }
397             bap += bs;
398           }
399         }
400       }
401       goto noinsert2;
402     }
403   }
404   if (nonew == 1) goto noinsert2;
405   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
406   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
407   N = nrow++ - 1; high++;
408   /* shift up all the later entries in this row */
409   for (ii=N; ii>=i; ii--) {
410     rp[ii+1] = rp[ii];
411     ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
412   }
413   if (N >= i) {
414     ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
415   }
416   rp[i] = col;
417   bap   = ap +  bs2*i;
418   if (roworiented) {
419     for (ii=0; ii<bs; ii++) {
420       for (jj=ii; jj<bs2; jj+=bs) {
421         bap[jj] = *value++;
422       }
423     }
424   } else {
425     for (ii=0; ii<bs; ii++) {
426       for (jj=0; jj<bs; jj++) {
427         *bap++ = *value++;
428       }
429     }
430   }
431   noinsert2:;
432   ailen[row] = nrow;
433   PetscFunctionReturn(0);
434 }
435 
436 #undef __FUNCT__
437 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
438 /*
439     This routine could be optimized by removing the need for the block copy below and passing stride information
440   to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
441 */
442 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
443 {
444   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
445   const MatScalar *value;
446   MatScalar       *barray     =baij->barray;
447   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
448   PetscErrorCode  ierr;
449   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
450   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
451   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
452 
453   PetscFunctionBegin;
454   if (!barray) {
455     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
456     baij->barray = barray;
457   }
458 
459   if (roworiented) {
460     stepval = (n-1)*bs;
461   } else {
462     stepval = (m-1)*bs;
463   }
464   for (i=0; i<m; i++) {
465     if (im[i] < 0) continue;
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
468 #endif
469     if (im[i] >= rstart && im[i] < rend) {
470       row = im[i] - rstart;
471       for (j=0; j<n; j++) {
472         if (im[i] > in[j]) {
473           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
474           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
475         }
476         /* If NumCol = 1 then a copy is not required */
477         if ((roworiented) && (n == 1)) {
478           barray = (MatScalar*) v + i*bs2;
479         } else if ((!roworiented) && (m == 1)) {
480           barray = (MatScalar*) v + j*bs2;
481         } else { /* Here a copy is required */
482           if (roworiented) {
483             value = v + i*(stepval+bs)*bs + j*bs;
484           } else {
485             value = v + j*(stepval+bs)*bs + i*bs;
486           }
487           for (ii=0; ii<bs; ii++,value+=stepval) {
488             for (jj=0; jj<bs; jj++) {
489               *barray++ = *value++;
490             }
491           }
492           barray -=bs2;
493         }
494 
495         if (in[j] >= cstart && in[j] < cend) {
496           col  = in[j] - cstart;
497           ierr = MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
498         } else if (in[j] < 0) continue;
499 #if defined(PETSC_USE_DEBUG)
500         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
501 #endif
502         else {
503           if (mat->was_assembled) {
504             if (!baij->colmap) {
505               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
506             }
507 
508 #if defined(PETSC_USE_DEBUG)
509 #if defined(PETSC_USE_CTABLE)
510             { PetscInt data;
511               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
512               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
513             }
514 #else
515             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
516 #endif
517 #endif
518 #if defined(PETSC_USE_CTABLE)
519             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
520             col  = (col - 1)/bs;
521 #else
522             col = (baij->colmap[in[j]] - 1)/bs;
523 #endif
524             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
525               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
526               col  = in[j];
527             }
528           } else col = in[j];
529           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
530         }
531       }
532     } else {
533       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
534       if (!baij->donotstash) {
535         if (roworiented) {
536           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
537         } else {
538           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
539         }
540       }
541     }
542   }
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatGetValues_MPISBAIJ"
548 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
549 {
550   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
551   PetscErrorCode ierr;
552   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
553   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
554 
555   PetscFunctionBegin;
556   for (i=0; i<m; i++) {
557     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
558     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
559     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
560       row = idxm[i] - bsrstart;
561       for (j=0; j<n; j++) {
562         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
563         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
564         if (idxn[j] >= bscstart && idxn[j] < bscend) {
565           col  = idxn[j] - bscstart;
566           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
567         } else {
568           if (!baij->colmap) {
569             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
570           }
571 #if defined(PETSC_USE_CTABLE)
572           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
573           data--;
574 #else
575           data = baij->colmap[idxn[j]/bs]-1;
576 #endif
577           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
578           else {
579             col  = data + idxn[j]%bs;
580             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
581           }
582         }
583       }
584     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatNorm_MPISBAIJ"
591 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
592 {
593   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
594   PetscErrorCode ierr;
595   PetscReal      sum[2],*lnorm2;
596 
597   PetscFunctionBegin;
598   if (baij->size == 1) {
599     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
600   } else {
601     if (type == NORM_FROBENIUS) {
602       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
603       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
605       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
606       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
607       ierr    = MPIU_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
608       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
609       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
610     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
611       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
612       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
613       PetscReal    *rsum,*rsum2,vabs;
614       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
615       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
616       MatScalar    *v;
617 
618       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
619       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
620       /* Amat */
621       v = amat->a; jj = amat->j;
622       for (brow=0; brow<mbs; brow++) {
623         grow = bs*(rstart + brow);
624         nz   = amat->i[brow+1] - amat->i[brow];
625         for (bcol=0; bcol<nz; bcol++) {
626           gcol = bs*(rstart + *jj); jj++;
627           for (col=0; col<bs; col++) {
628             for (row=0; row<bs; row++) {
629               vabs            = PetscAbsScalar(*v); v++;
630               rsum[gcol+col] += vabs;
631               /* non-diagonal block */
632               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
633             }
634           }
635         }
636         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
637       }
638       /* Bmat */
639       v = bmat->a; jj = bmat->j;
640       for (brow=0; brow<mbs; brow++) {
641         grow = bs*(rstart + brow);
642         nz = bmat->i[brow+1] - bmat->i[brow];
643         for (bcol=0; bcol<nz; bcol++) {
644           gcol = bs*garray[*jj]; jj++;
645           for (col=0; col<bs; col++) {
646             for (row=0; row<bs; row++) {
647               vabs            = PetscAbsScalar(*v); v++;
648               rsum[gcol+col] += vabs;
649               rsum[grow+row] += vabs;
650             }
651           }
652         }
653         ierr = PetscLogFlops(nz*bs*bs);CHKERRQ(ierr);
654       }
655       ierr  = MPIU_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
656       *norm = 0.0;
657       for (col=0; col<mat->cmap->N; col++) {
658         if (rsum2[col] > *norm) *norm = rsum2[col];
659       }
660       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
661     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
662   }
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
668 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode ierr;
672   PetscInt       nstash,reallocs;
673 
674   PetscFunctionBegin;
675   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
676 
677   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
678   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
679   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
680   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
681   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
682   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
688 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
689 {
690   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
691   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
692   PetscErrorCode ierr;
693   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
694   PetscInt       *row,*col;
695   PetscBool      other_disassembled;
696   PetscMPIInt    n;
697   PetscBool      r1,r2,r3;
698   MatScalar      *val;
699 
700   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
701   PetscFunctionBegin;
702   if (!baij->donotstash &&  !mat->nooffprocentries) {
703     while (1) {
704       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
705       if (!flg) break;
706 
707       for (i=0; i<n;) {
708         /* Now identify the consecutive vals belonging to the same row */
709         for (j=i,rstart=row[j]; j<n; j++) {
710           if (row[j] != rstart) break;
711         }
712         if (j < n) ncols = j-i;
713         else       ncols = n-i;
714         /* Now assemble all these values with a single function call */
715         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
716         i    = j;
717       }
718     }
719     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
720     /* Now process the block-stash. Since the values are stashed column-oriented,
721        set the roworiented flag to column oriented, and after MatSetValues()
722        restore the original flags */
723     r1 = baij->roworiented;
724     r2 = a->roworiented;
725     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
726 
727     baij->roworiented = PETSC_FALSE;
728     a->roworiented    = PETSC_FALSE;
729 
730     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
731     while (1) {
732       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
733       if (!flg) break;
734 
735       for (i=0; i<n;) {
736         /* Now identify the consecutive vals belonging to the same row */
737         for (j=i,rstart=row[j]; j<n; j++) {
738           if (row[j] != rstart) break;
739         }
740         if (j < n) ncols = j-i;
741         else       ncols = n-i;
742         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
743         i    = j;
744       }
745     }
746     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
747 
748     baij->roworiented = r1;
749     a->roworiented    = r2;
750 
751     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
752   }
753 
754   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
755   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
756 
757   /* determine if any processor has disassembled, if so we must
758      also disassemble ourselfs, in order that we may reassemble. */
759   /*
760      if nonzero structure of submatrix B cannot change then we know that
761      no processor disassembled thus we can skip this stuff
762   */
763   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
764     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
765     if (mat->was_assembled && !other_disassembled) {
766       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
767     }
768   }
769 
770   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
771     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
772   }
773   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
774   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
775 
776   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
777 
778   baij->rowvalues = 0;
779 
780   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
781   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
782     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
783     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 extern PetscErrorCode MatView_SeqSBAIJ(Mat,PetscViewer);
789 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
790 #include <petscdraw.h>
791 #undef __FUNCT__
792 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
793 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
794 {
795   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
796   PetscErrorCode    ierr;
797   PetscInt          bs   = mat->rmap->bs;
798   PetscMPIInt       rank = baij->rank;
799   PetscBool         iascii,isdraw;
800   PetscViewer       sviewer;
801   PetscViewerFormat format;
802 
803   PetscFunctionBegin;
804   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
805   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
806   if (iascii) {
807     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
808     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
809       MatInfo info;
810       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
811       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
812       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
814       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
815       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
816       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
817       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
818       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
819       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
820       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
821       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
822       PetscFunctionReturn(0);
823     } else if (format == PETSC_VIEWER_ASCII_INFO) {
824       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
825       PetscFunctionReturn(0);
826     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
827       PetscFunctionReturn(0);
828     }
829   }
830 
831   if (isdraw) {
832     PetscDraw draw;
833     PetscBool isnull;
834     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
835     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
836     if (isnull) PetscFunctionReturn(0);
837   }
838 
839   {
840     /* assemble the entire matrix onto first processor. */
841     Mat          A;
842     Mat_SeqSBAIJ *Aloc;
843     Mat_SeqBAIJ  *Bloc;
844     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
845     MatScalar    *a;
846     const char   *matname;
847 
848     /* Should this be the same type as mat? */
849     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
850     if (!rank) {
851       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
852     } else {
853       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
854     }
855     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
856     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
857     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
858     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
859 
860     /* copy over the A part */
861     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
862     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
863     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
864 
865     for (i=0; i<mbs; i++) {
866       rvals[0] = bs*(baij->rstartbs + i);
867       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
868       for (j=ai[i]; j<ai[i+1]; j++) {
869         col = (baij->cstartbs+aj[j])*bs;
870         for (k=0; k<bs; k++) {
871           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
872           col++;
873           a += bs;
874         }
875       }
876     }
877     /* copy over the B part */
878     Bloc = (Mat_SeqBAIJ*)baij->B->data;
879     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
880     for (i=0; i<mbs; i++) {
881 
882       rvals[0] = bs*(baij->rstartbs + i);
883       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
884       for (j=ai[i]; j<ai[i+1]; j++) {
885         col = baij->garray[aj[j]]*bs;
886         for (k=0; k<bs; k++) {
887           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
888           col++;
889           a += bs;
890         }
891       }
892     }
893     ierr = PetscFree(rvals);CHKERRQ(ierr);
894     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
895     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
896     /*
897        Everyone has to call to draw the matrix since the graphics waits are
898        synchronized across all processors that share the PetscDraw object
899     */
900     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
901     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
902     if (!rank) {
903       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
904       ierr = MatView_SeqSBAIJ(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
905     }
906     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
907     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
908     ierr = MatDestroy(&A);CHKERRQ(ierr);
909   }
910   PetscFunctionReturn(0);
911 }
912 
913 #undef __FUNCT__
914 #define __FUNCT__ "MatView_MPISBAIJ_Binary"
915 static PetscErrorCode MatView_MPISBAIJ_Binary(Mat mat,PetscViewer viewer)
916 {
917   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)mat->data;
918   Mat_SeqSBAIJ   *A = (Mat_SeqSBAIJ*)a->A->data;
919   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
920   PetscErrorCode ierr;
921   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
922   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
923   int            fd;
924   PetscScalar    *column_values;
925   FILE           *file;
926   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
927   PetscInt       message_count,flowcontrolcount;
928 
929   PetscFunctionBegin;
930   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
931   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
932   nz   = bs2*(A->nz + B->nz);
933   rlen = mat->rmap->n;
934   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
935   if (!rank) {
936     header[0] = MAT_FILE_CLASSID;
937     header[1] = mat->rmap->N;
938     header[2] = mat->cmap->N;
939 
940     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
941     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
942     /* get largest number of rows any processor has */
943     range = mat->rmap->range;
944     for (i=1; i<size; i++) {
945       rlen = PetscMax(rlen,range[i+1] - range[i]);
946     }
947   } else {
948     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
949   }
950 
951   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
952   /* compute lengths of each row  */
953   for (i=0; i<a->mbs; i++) {
954     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
955   }
956   /* store the row lengths to the file */
957   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
958   if (!rank) {
959     MPI_Status status;
960     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
961     rlen = (range[1] - range[0])/bs;
962     for (i=0; i<rlen; i++) {
963       for (j=0; j<bs; j++) {
964         row_lens[i*bs+j] = bs*crow_lens[i];
965       }
966     }
967     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
968     for (i=1; i<size; i++) {
969       rlen = (range[i+1] - range[i])/bs;
970       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
971       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
972       for (k=0; k<rlen; k++) {
973         for (j=0; j<bs; j++) {
974           row_lens[k*bs+j] = bs*crow_lens[k];
975         }
976       }
977       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
978     }
979     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
980     ierr = PetscFree(row_lens);CHKERRQ(ierr);
981   } else {
982     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
983     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
984     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
985   }
986   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
987 
988   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
989      information needed to make it for each row from a block row. This does require more communication but still not more than
990      the communication needed for the nonzero values  */
991   nzmax = nz; /*  space a largest processor needs */
992   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
993   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
994   cnt   = 0;
995   for (i=0; i<a->mbs; i++) {
996     pcnt = cnt;
997     for (j=B->i[i]; j<B->i[i+1]; j++) {
998       if ((col = garray[B->j[j]]) > cstart) break;
999       for (l=0; l<bs; l++) {
1000         column_indices[cnt++] = bs*col+l;
1001       }
1002     }
1003     for (k=A->i[i]; k<A->i[i+1]; k++) {
1004       for (l=0; l<bs; l++) {
1005         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1006       }
1007     }
1008     for (; j<B->i[i+1]; j++) {
1009       for (l=0; l<bs; l++) {
1010         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1011       }
1012     }
1013     len = cnt - pcnt;
1014     for (k=1; k<bs; k++) {
1015       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1016       cnt += len;
1017     }
1018   }
1019   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1020 
1021   /* store the columns to the file */
1022   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1023   if (!rank) {
1024     MPI_Status status;
1025     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1026     for (i=1; i<size; i++) {
1027       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1028       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1029       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1030       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1031     }
1032     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1033   } else {
1034     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1035     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1036     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1037     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1038   }
1039   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1040 
1041   /* load up the numerical values */
1042   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1043   cnt  = 0;
1044   for (i=0; i<a->mbs; i++) {
1045     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1046     for (j=B->i[i]; j<B->i[i+1]; j++) {
1047       if (garray[B->j[j]] > cstart) break;
1048       for (l=0; l<bs; l++) {
1049         for (ll=0; ll<bs; ll++) {
1050           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1051         }
1052       }
1053       cnt += bs;
1054     }
1055     for (k=A->i[i]; k<A->i[i+1]; k++) {
1056       for (l=0; l<bs; l++) {
1057         for (ll=0; ll<bs; ll++) {
1058           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1059         }
1060       }
1061       cnt += bs;
1062     }
1063     for (; j<B->i[i+1]; j++) {
1064       for (l=0; l<bs; l++) {
1065         for (ll=0; ll<bs; ll++) {
1066           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1067         }
1068       }
1069       cnt += bs;
1070     }
1071     cnt += (bs-1)*rlen;
1072   }
1073   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1074 
1075   /* store the column values to the file */
1076   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1077   if (!rank) {
1078     MPI_Status status;
1079     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1080     for (i=1; i<size; i++) {
1081       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1082       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1083       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1084       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1085     }
1086     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1087   } else {
1088     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1089     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1090     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1091     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1092   }
1093   ierr = PetscFree(column_values);CHKERRQ(ierr);
1094 
1095   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1096   if (file) {
1097     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1098   }
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatView_MPISBAIJ"
1104 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
1105 {
1106   PetscErrorCode ierr;
1107   PetscBool      iascii,isdraw,issocket,isbinary;
1108 
1109   PetscFunctionBegin;
1110   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1111   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1112   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1113   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1114   if (iascii || isdraw || issocket) {
1115     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1116   } else if (isbinary) {
1117     ierr = MatView_MPISBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1118   }
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatDestroy_MPISBAIJ"
1124 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
1125 {
1126   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130 #if defined(PETSC_USE_LOG)
1131   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1132 #endif
1133   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1134   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1135   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1136   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1137 #if defined(PETSC_USE_CTABLE)
1138   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1139 #else
1140   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1141 #endif
1142   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1143   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1144   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1145   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
1146   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
1147   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
1148   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
1149   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
1150   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
1151   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1152   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1153   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1154   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
1155   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
1156   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
1157 #if defined(PETSC_USE_REAL_MAT_SINGLE)
1158   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1159 #endif
1160   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
1161   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
1162   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1163   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1164 
1165   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
1170 #if defined(PETSC_HAVE_ELEMENTAL)
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
1172 #endif
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
1178 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
1179 {
1180   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1181   PetscErrorCode    ierr;
1182   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1183   PetscScalar       *from;
1184   const PetscScalar *x;
1185 
1186   PetscFunctionBegin;
1187   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1188   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1189 
1190   /* diagonal part */
1191   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1192   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1193 
1194   /* subdiagonal part */
1195   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1196 
1197   /* copy x into the vec slvec0 */
1198   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1199   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1200 
1201   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1202   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1203   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1204 
1205   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1206   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1207   /* supperdiagonal part */
1208   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatMult_MPISBAIJ"
1214 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1215 {
1216   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1217   PetscErrorCode    ierr;
1218   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
1219   PetscScalar       *from;
1220   const PetscScalar *x;
1221 
1222   PetscFunctionBegin;
1223   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1224   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1225 
1226   /* diagonal part */
1227   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
1228   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
1229 
1230   /* subdiagonal part */
1231   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1232 
1233   /* copy x into the vec slvec0 */
1234   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1235   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1236 
1237   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1238   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1239   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1240 
1241   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1242   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1243   /* supperdiagonal part */
1244   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1250 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1251 {
1252   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1253   PetscErrorCode ierr;
1254   PetscInt       nt;
1255 
1256   PetscFunctionBegin;
1257   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1258   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1259 
1260   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1261   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1262 
1263   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1264   /* do diagonal part */
1265   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1266   /* do supperdiagonal part */
1267   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1268   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1269   /* do subdiagonal part */
1270   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1271   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1272   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1278 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1279 {
1280   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1281   PetscErrorCode    ierr;
1282   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
1283   PetscScalar       *from,zero=0.0;
1284   const PetscScalar *x;
1285 
1286   PetscFunctionBegin;
1287   /*
1288   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
1289   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
1290   */
1291   /* diagonal part */
1292   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1293   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
1294 
1295   /* subdiagonal part */
1296   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1297 
1298   /* copy x into the vec slvec0 */
1299   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1300   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
1301   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1302   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1303 
1304   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
1306   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307 
1308   /* supperdiagonal part */
1309   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1315 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1316 {
1317   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1322   /* do diagonal part */
1323   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1324   /* do supperdiagonal part */
1325   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1326   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1327 
1328   /* do subdiagonal part */
1329   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1330   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1331   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 /*
1336   This only works correctly for square matrices where the subblock A->A is the
1337    diagonal block
1338 */
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1341 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1342 {
1343   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1348   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatScale_MPISBAIJ"
1354 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1355 {
1356   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1361   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1367 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1368 {
1369   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1370   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1371   PetscErrorCode ierr;
1372   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1373   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1374   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1375 
1376   PetscFunctionBegin;
1377   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1378   mat->getrowactive = PETSC_TRUE;
1379 
1380   if (!mat->rowvalues && (idx || v)) {
1381     /*
1382         allocate enough space to hold information from the longest row.
1383     */
1384     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1385     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1386     PetscInt     max = 1,mbs = mat->mbs,tmp;
1387     for (i=0; i<mbs; i++) {
1388       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1389       if (max < tmp) max = tmp;
1390     }
1391     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1392   }
1393 
1394   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1395   lrow = row - brstart;  /* local row index */
1396 
1397   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1398   if (!v)   {pvA = 0; pvB = 0;}
1399   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1400   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1401   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1402   nztot = nzA + nzB;
1403 
1404   cmap = mat->garray;
1405   if (v  || idx) {
1406     if (nztot) {
1407       /* Sort by increasing column numbers, assuming A and B already sorted */
1408       PetscInt imark = -1;
1409       if (v) {
1410         *v = v_p = mat->rowvalues;
1411         for (i=0; i<nzB; i++) {
1412           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1413           else break;
1414         }
1415         imark = i;
1416         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1417         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1418       }
1419       if (idx) {
1420         *idx = idx_p = mat->rowindices;
1421         if (imark > -1) {
1422           for (i=0; i<imark; i++) {
1423             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1424           }
1425         } else {
1426           for (i=0; i<nzB; i++) {
1427             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1428             else break;
1429           }
1430           imark = i;
1431         }
1432         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1433         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1434       }
1435     } else {
1436       if (idx) *idx = 0;
1437       if (v)   *v   = 0;
1438     }
1439   }
1440   *nz  = nztot;
1441   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1442   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1448 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1449 {
1450   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1451 
1452   PetscFunctionBegin;
1453   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1454   baij->getrowactive = PETSC_FALSE;
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1460 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1461 {
1462   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1463   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1464 
1465   PetscFunctionBegin;
1466   aA->getrow_utriangular = PETSC_TRUE;
1467   PetscFunctionReturn(0);
1468 }
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1471 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1472 {
1473   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1474   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1475 
1476   PetscFunctionBegin;
1477   aA->getrow_utriangular = PETSC_FALSE;
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1483 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1484 {
1485   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1486   PetscErrorCode ierr;
1487 
1488   PetscFunctionBegin;
1489   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1490   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1496 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1497 {
1498   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1499   PetscErrorCode ierr;
1500 
1501   PetscFunctionBegin;
1502   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1503   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1508    Input: isrow       - distributed(parallel),
1509           iscol_local - locally owned (seq)
1510 */
1511 #undef __FUNCT__
1512 #define __FUNCT__ "ISEqual_private"
1513 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1514 {
1515   PetscErrorCode ierr;
1516   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1517   const PetscInt *ptr1,*ptr2;
1518 
1519   PetscFunctionBegin;
1520   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1521   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1522   if (sz1 > sz2) {
1523     *flg = PETSC_FALSE;
1524     PetscFunctionReturn(0);
1525   }
1526 
1527   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1528   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1529 
1530   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1531   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1532   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1533   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1534   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1535   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1536 
1537   nmatch=0;
1538   k     = 0;
1539   for (i=0; i<sz1; i++){
1540     for (j=k; j<sz2; j++){
1541       if (a1[i] == a2[j]) {
1542         k = j; nmatch++;
1543         break;
1544       }
1545     }
1546   }
1547   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1548   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1549   ierr = PetscFree(a1);CHKERRQ(ierr);
1550   ierr = PetscFree(a2);CHKERRQ(ierr);
1551   if (nmatch < sz1) {
1552     *flg = PETSC_FALSE;
1553   } else {
1554     *flg = PETSC_TRUE;
1555   }
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1561 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1562 {
1563   PetscErrorCode ierr;
1564   IS             iscol_local;
1565   PetscInt       csize;
1566   PetscBool      isequal;
1567 
1568   PetscFunctionBegin;
1569   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1570   if (call == MAT_REUSE_MATRIX) {
1571     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1572     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1573   } else {
1574     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1575     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1576     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1577   }
1578 
1579   /* now call MatGetSubMatrix_MPIBAIJ() */
1580   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1581   if (call == MAT_INITIAL_MATRIX) {
1582     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1583     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1590 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1591 {
1592   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1593   PetscErrorCode ierr;
1594 
1595   PetscFunctionBegin;
1596   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1597   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1598   PetscFunctionReturn(0);
1599 }
1600 
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1603 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1604 {
1605   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1606   Mat            A  = a->A,B = a->B;
1607   PetscErrorCode ierr;
1608   PetscReal      isend[5],irecv[5];
1609 
1610   PetscFunctionBegin;
1611   info->block_size = (PetscReal)matin->rmap->bs;
1612 
1613   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1614 
1615   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1616   isend[3] = info->memory;  isend[4] = info->mallocs;
1617 
1618   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1619 
1620   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1621   isend[3] += info->memory;  isend[4] += info->mallocs;
1622   if (flag == MAT_LOCAL) {
1623     info->nz_used      = isend[0];
1624     info->nz_allocated = isend[1];
1625     info->nz_unneeded  = isend[2];
1626     info->memory       = isend[3];
1627     info->mallocs      = isend[4];
1628   } else if (flag == MAT_GLOBAL_MAX) {
1629     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1630 
1631     info->nz_used      = irecv[0];
1632     info->nz_allocated = irecv[1];
1633     info->nz_unneeded  = irecv[2];
1634     info->memory       = irecv[3];
1635     info->mallocs      = irecv[4];
1636   } else if (flag == MAT_GLOBAL_SUM) {
1637     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1638 
1639     info->nz_used      = irecv[0];
1640     info->nz_allocated = irecv[1];
1641     info->nz_unneeded  = irecv[2];
1642     info->memory       = irecv[3];
1643     info->mallocs      = irecv[4];
1644   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1645   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1646   info->fill_ratio_needed = 0;
1647   info->factor_mallocs    = 0;
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1653 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1654 {
1655   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1656   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1657   PetscErrorCode ierr;
1658 
1659   PetscFunctionBegin;
1660   switch (op) {
1661   case MAT_NEW_NONZERO_LOCATIONS:
1662   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1663   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1664   case MAT_KEEP_NONZERO_PATTERN:
1665   case MAT_SUBMAT_SINGLEIS:
1666   case MAT_NEW_NONZERO_LOCATION_ERR:
1667     MatCheckPreallocated(A,1);
1668     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1669     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1670     break;
1671   case MAT_ROW_ORIENTED:
1672     MatCheckPreallocated(A,1);
1673     a->roworiented = flg;
1674 
1675     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1676     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1677     break;
1678   case MAT_NEW_DIAGONALS:
1679     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1680     break;
1681   case MAT_IGNORE_OFF_PROC_ENTRIES:
1682     a->donotstash = flg;
1683     break;
1684   case MAT_USE_HASH_TABLE:
1685     a->ht_flag = flg;
1686     break;
1687   case MAT_HERMITIAN:
1688     MatCheckPreallocated(A,1);
1689     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1690     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1691 
1692     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1693     break;
1694   case MAT_SPD:
1695     A->spd_set = PETSC_TRUE;
1696     A->spd     = flg;
1697     if (flg) {
1698       A->symmetric                  = PETSC_TRUE;
1699       A->structurally_symmetric     = PETSC_TRUE;
1700       A->symmetric_set              = PETSC_TRUE;
1701       A->structurally_symmetric_set = PETSC_TRUE;
1702     }
1703     break;
1704   case MAT_SYMMETRIC:
1705     MatCheckPreallocated(A,1);
1706     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1707     break;
1708   case MAT_STRUCTURALLY_SYMMETRIC:
1709     MatCheckPreallocated(A,1);
1710     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1711     break;
1712   case MAT_SYMMETRY_ETERNAL:
1713     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1714     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1715     break;
1716   case MAT_IGNORE_LOWER_TRIANGULAR:
1717     aA->ignore_ltriangular = flg;
1718     break;
1719   case MAT_ERROR_LOWER_TRIANGULAR:
1720     aA->ignore_ltriangular = flg;
1721     break;
1722   case MAT_GETROW_UPPERTRIANGULAR:
1723     aA->getrow_utriangular = flg;
1724     break;
1725   default:
1726     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1727   }
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1733 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1734 {
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   if (reuse == MAT_INITIAL_MATRIX) {
1739     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1740   }  else if (reuse == MAT_REUSE_MATRIX) {
1741     ierr = MatCopy(A,*B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
1742   }
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1748 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1749 {
1750   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1751   Mat            a     = baij->A, b=baij->B;
1752   PetscErrorCode ierr;
1753   PetscInt       nv,m,n;
1754   PetscBool      flg;
1755 
1756   PetscFunctionBegin;
1757   if (ll != rr) {
1758     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1759     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1760   }
1761   if (!ll) PetscFunctionReturn(0);
1762 
1763   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1764   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1765 
1766   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1767   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1768 
1769   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1770 
1771   /* left diagonalscale the off-diagonal part */
1772   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1773 
1774   /* scale the diagonal part */
1775   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1776 
1777   /* right diagonalscale the off-diagonal part */
1778   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1779   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1785 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1786 {
1787   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1788   PetscErrorCode ierr;
1789 
1790   PetscFunctionBegin;
1791   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatEqual_MPISBAIJ"
1799 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1800 {
1801   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1802   Mat            a,b,c,d;
1803   PetscBool      flg;
1804   PetscErrorCode ierr;
1805 
1806   PetscFunctionBegin;
1807   a = matA->A; b = matA->B;
1808   c = matB->A; d = matB->B;
1809 
1810   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1811   if (flg) {
1812     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1813   }
1814   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1815   PetscFunctionReturn(0);
1816 }
1817 
1818 #undef __FUNCT__
1819 #define __FUNCT__ "MatCopy_MPISBAIJ"
1820 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1821 {
1822   PetscErrorCode ierr;
1823   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1824   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1825 
1826   PetscFunctionBegin;
1827   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1828   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1829     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1830     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1831     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1832   } else {
1833     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1834     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1835   }
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1841 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1842 {
1843   PetscErrorCode ierr;
1844 
1845   PetscFunctionBegin;
1846   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1852 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1853 {
1854   PetscErrorCode ierr;
1855   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1856   PetscBLASInt   bnz,one=1;
1857   Mat_SeqSBAIJ   *xa,*ya;
1858   Mat_SeqBAIJ    *xb,*yb;
1859 
1860   PetscFunctionBegin;
1861   if (str == SAME_NONZERO_PATTERN) {
1862     PetscScalar alpha = a;
1863     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1864     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1865     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1866     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1867     xb   = (Mat_SeqBAIJ*)xx->B->data;
1868     yb   = (Mat_SeqBAIJ*)yy->B->data;
1869     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1870     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1871     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1872   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1873     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1874     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1875     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1876   } else {
1877     Mat      B;
1878     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1879     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1880     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1881     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1882     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1883     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1884     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1885     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1886     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1887     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1888     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1889     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1890     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1891     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1892     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1893     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1894     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1895     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1896     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1897     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1904 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1905 {
1906   PetscErrorCode ierr;
1907   PetscInt       i;
1908   PetscBool      flg;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr); /* B[] are sbaij matrices */
1912   for (i=0; i<n; i++) {
1913     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1914     if (!flg) {
1915       ierr = MatSeqSBAIJZeroOps_Private(*B[i]);CHKERRQ(ierr);
1916     }
1917   }
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatShift_MPISBAIJ"
1923 PetscErrorCode MatShift_MPISBAIJ(Mat Y,PetscScalar a)
1924 {
1925   PetscErrorCode ierr;
1926   Mat_MPISBAIJ    *maij = (Mat_MPISBAIJ*)Y->data;
1927   Mat_SeqSBAIJ    *aij = (Mat_SeqSBAIJ*)maij->A->data;
1928 
1929   PetscFunctionBegin;
1930   if (!Y->preallocated) {
1931     ierr = MatMPISBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
1932   } else if (!aij->nz) {
1933     PetscInt nonew = aij->nonew;
1934     ierr = MatSeqSBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
1935     aij->nonew = nonew;
1936   }
1937   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatMissingDiagonal_MPISBAIJ"
1943 PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1944 {
1945   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1946   PetscErrorCode ierr;
1947 
1948   PetscFunctionBegin;
1949   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
1950   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
1951   if (d) {
1952     PetscInt rstart;
1953     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1954     *d += rstart/A->rmap->bs;
1955 
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1962 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1963 {
1964   PetscFunctionBegin;
1965   *a = ((Mat_MPISBAIJ*)A->data)->A;
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 /* -------------------------------------------------------------------*/
1970 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1971                                        MatGetRow_MPISBAIJ,
1972                                        MatRestoreRow_MPISBAIJ,
1973                                        MatMult_MPISBAIJ,
1974                                /*  4*/ MatMultAdd_MPISBAIJ,
1975                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1976                                        MatMultAdd_MPISBAIJ,
1977                                        0,
1978                                        0,
1979                                        0,
1980                                /* 10*/ 0,
1981                                        0,
1982                                        0,
1983                                        MatSOR_MPISBAIJ,
1984                                        MatTranspose_MPISBAIJ,
1985                                /* 15*/ MatGetInfo_MPISBAIJ,
1986                                        MatEqual_MPISBAIJ,
1987                                        MatGetDiagonal_MPISBAIJ,
1988                                        MatDiagonalScale_MPISBAIJ,
1989                                        MatNorm_MPISBAIJ,
1990                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1991                                        MatAssemblyEnd_MPISBAIJ,
1992                                        MatSetOption_MPISBAIJ,
1993                                        MatZeroEntries_MPISBAIJ,
1994                                /* 24*/ 0,
1995                                        0,
1996                                        0,
1997                                        0,
1998                                        0,
1999                                /* 29*/ MatSetUp_MPISBAIJ,
2000                                        0,
2001                                        0,
2002                                        MatGetDiagonalBlock_MPISBAIJ,
2003                                        0,
2004                                /* 34*/ MatDuplicate_MPISBAIJ,
2005                                        0,
2006                                        0,
2007                                        0,
2008                                        0,
2009                                /* 39*/ MatAXPY_MPISBAIJ,
2010                                        MatGetSubMatrices_MPISBAIJ,
2011                                        MatIncreaseOverlap_MPISBAIJ,
2012                                        MatGetValues_MPISBAIJ,
2013                                        MatCopy_MPISBAIJ,
2014                                /* 44*/ 0,
2015                                        MatScale_MPISBAIJ,
2016                                        MatShift_MPISBAIJ,
2017                                        0,
2018                                        0,
2019                                /* 49*/ 0,
2020                                        0,
2021                                        0,
2022                                        0,
2023                                        0,
2024                                /* 54*/ 0,
2025                                        0,
2026                                        MatSetUnfactored_MPISBAIJ,
2027                                        0,
2028                                        MatSetValuesBlocked_MPISBAIJ,
2029                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
2030                                        0,
2031                                        0,
2032                                        0,
2033                                        0,
2034                                /* 64*/ 0,
2035                                        0,
2036                                        0,
2037                                        0,
2038                                        0,
2039                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
2040                                        0,
2041                                        0,
2042                                        0,
2043                                        0,
2044                                /* 74*/ 0,
2045                                        0,
2046                                        0,
2047                                        0,
2048                                        0,
2049                                /* 79*/ 0,
2050                                        0,
2051                                        0,
2052                                        0,
2053                                        MatLoad_MPISBAIJ,
2054                                /* 84*/ 0,
2055                                        0,
2056                                        0,
2057                                        0,
2058                                        0,
2059                                /* 89*/ 0,
2060                                        0,
2061                                        0,
2062                                        0,
2063                                        0,
2064                                /* 94*/ 0,
2065                                        0,
2066                                        0,
2067                                        0,
2068                                        0,
2069                                /* 99*/ 0,
2070                                        0,
2071                                        0,
2072                                        0,
2073                                        0,
2074                                /*104*/ 0,
2075                                        MatRealPart_MPISBAIJ,
2076                                        MatImaginaryPart_MPISBAIJ,
2077                                        MatGetRowUpperTriangular_MPISBAIJ,
2078                                        MatRestoreRowUpperTriangular_MPISBAIJ,
2079                                /*109*/ 0,
2080                                        0,
2081                                        0,
2082                                        0,
2083                                        MatMissingDiagonal_MPISBAIJ,
2084                                /*114*/ 0,
2085                                        0,
2086                                        0,
2087                                        0,
2088                                        0,
2089                                /*119*/ 0,
2090                                        0,
2091                                        0,
2092                                        0,
2093                                        0,
2094                                /*124*/ 0,
2095                                        0,
2096                                        0,
2097                                        0,
2098                                        0,
2099                                /*129*/ 0,
2100                                        0,
2101                                        0,
2102                                        0,
2103                                        0,
2104                                /*134*/ 0,
2105                                        0,
2106                                        0,
2107                                        0,
2108                                        0,
2109                                /*139*/ MatSetBlockSizes_Default,
2110                                        0,
2111                                        0,
2112                                        0,
2113                                        0,
2114                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
2115 };
2116 
2117 #undef __FUNCT__
2118 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
2119 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2120 {
2121   Mat_MPISBAIJ   *b;
2122   PetscErrorCode ierr;
2123   PetscInt       i,mbs,Mbs;
2124 
2125   PetscFunctionBegin;
2126   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2127   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2128   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2129   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2130 
2131   b   = (Mat_MPISBAIJ*)B->data;
2132   mbs = B->rmap->n/bs;
2133   Mbs = B->rmap->N/bs;
2134   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
2135 
2136   B->rmap->bs = bs;
2137   b->bs2      = bs*bs;
2138   b->mbs      = mbs;
2139   b->Mbs      = Mbs;
2140   b->nbs      = B->cmap->n/bs;
2141   b->Nbs      = B->cmap->N/bs;
2142 
2143   for (i=0; i<=b->size; i++) {
2144     b->rangebs[i] = B->rmap->range[i]/bs;
2145   }
2146   b->rstartbs = B->rmap->rstart/bs;
2147   b->rendbs   = B->rmap->rend/bs;
2148 
2149   b->cstartbs = B->cmap->rstart/bs;
2150   b->cendbs   = B->cmap->rend/bs;
2151 
2152 #if defined(PETSC_USE_CTABLE)
2153   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2154 #else
2155   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2156 #endif
2157   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2158   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2159   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2160   ierr = VecDestroy(&b->slvec0);CHKERRQ(ierr);
2161   ierr = VecDestroy(&b->slvec0b);CHKERRQ(ierr);
2162   ierr = VecDestroy(&b->slvec1);CHKERRQ(ierr);
2163   ierr = VecDestroy(&b->slvec1a);CHKERRQ(ierr);
2164   ierr = VecDestroy(&b->slvec1b);CHKERRQ(ierr);
2165   ierr = VecScatterDestroy(&b->sMvctx);CHKERRQ(ierr);
2166 
2167   /* Because the B will have been resized we simply destroy it and create a new one each time */
2168   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2169   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2170   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2171   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2172   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2173 
2174   if (!B->preallocated) {
2175     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2176     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2177     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
2178     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2179     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2180   }
2181 
2182   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2183   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2184 
2185   B->preallocated  = PETSC_TRUE;
2186   B->was_assembled = PETSC_FALSE;
2187   B->assembled     = PETSC_FALSE;
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
2193 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2194 {
2195   PetscInt       m,rstart,cstart,cend;
2196   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2197   const PetscInt *JJ    =0;
2198   PetscScalar    *values=0;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2203   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2204   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2205   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2206   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2207   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2208   m      = B->rmap->n/bs;
2209   rstart = B->rmap->rstart/bs;
2210   cstart = B->cmap->rstart/bs;
2211   cend   = B->cmap->rend/bs;
2212 
2213   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2214   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2215   for (i=0; i<m; i++) {
2216     nz = ii[i+1] - ii[i];
2217     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2218     nz_max = PetscMax(nz_max,nz);
2219     JJ     = jj + ii[i];
2220     for (j=0; j<nz; j++) {
2221       if (*JJ >= cstart) break;
2222       JJ++;
2223     }
2224     d = 0;
2225     for (; j<nz; j++) {
2226       if (*JJ++ >= cend) break;
2227       d++;
2228     }
2229     d_nnz[i] = d;
2230     o_nnz[i] = nz - d;
2231   }
2232   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2233   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2234 
2235   values = (PetscScalar*)V;
2236   if (!values) {
2237     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2238     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2239   }
2240   for (i=0; i<m; i++) {
2241     PetscInt          row    = i + rstart;
2242     PetscInt          ncols  = ii[i+1] - ii[i];
2243     const PetscInt    *icols = jj + ii[i];
2244     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2245     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2246   }
2247 
2248   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2249   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2250   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 /*MC
2256    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2257    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
2258    the matrix is stored.
2259 
2260   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2261   can call MatSetOption(Mat, MAT_HERMITIAN);
2262 
2263    Options Database Keys:
2264 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
2265 
2266   Level: beginner
2267 
2268 .seealso: MatCreateMPISBAIJ
2269 M*/
2270 
2271 PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatCreate_MPISBAIJ"
2275 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2276 {
2277   Mat_MPISBAIJ   *b;
2278   PetscErrorCode ierr;
2279   PetscBool      flg = PETSC_FALSE;
2280 
2281   PetscFunctionBegin;
2282   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2283   B->data = (void*)b;
2284   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2285 
2286   B->ops->destroy = MatDestroy_MPISBAIJ;
2287   B->ops->view    = MatView_MPISBAIJ;
2288   B->assembled    = PETSC_FALSE;
2289   B->insertmode   = NOT_SET_VALUES;
2290 
2291   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2292   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2293 
2294   /* build local table of row and column ownerships */
2295   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
2296 
2297   /* build cache for off array entries formed */
2298   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2299 
2300   b->donotstash  = PETSC_FALSE;
2301   b->colmap      = NULL;
2302   b->garray      = NULL;
2303   b->roworiented = PETSC_TRUE;
2304 
2305   /* stuff used in block assembly */
2306   b->barray = 0;
2307 
2308   /* stuff used for matrix vector multiply */
2309   b->lvec    = 0;
2310   b->Mvctx   = 0;
2311   b->slvec0  = 0;
2312   b->slvec0b = 0;
2313   b->slvec1  = 0;
2314   b->slvec1a = 0;
2315   b->slvec1b = 0;
2316   b->sMvctx  = 0;
2317 
2318   /* stuff for MatGetRow() */
2319   b->rowindices   = 0;
2320   b->rowvalues    = 0;
2321   b->getrowactive = PETSC_FALSE;
2322 
2323   /* hash table stuff */
2324   b->ht           = 0;
2325   b->hd           = 0;
2326   b->ht_size      = 0;
2327   b->ht_flag      = PETSC_FALSE;
2328   b->ht_fact      = 0;
2329   b->ht_total_ct  = 0;
2330   b->ht_insert_ct = 0;
2331 
2332   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2333   b->ijonly = PETSC_FALSE;
2334 
2335   b->in_loc = 0;
2336   b->v_loc  = 0;
2337   b->n_loc  = 0;
2338 
2339   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2340   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2341   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
2342   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
2343 #if defined(PETSC_HAVE_ELEMENTAL)
2344   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
2345 #endif
2346 
2347   B->symmetric                  = PETSC_TRUE;
2348   B->structurally_symmetric     = PETSC_TRUE;
2349   B->symmetric_set              = PETSC_TRUE;
2350   B->structurally_symmetric_set = PETSC_TRUE;
2351 
2352   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
2353   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
2354   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
2355   if (flg) {
2356     PetscReal fact = 1.39;
2357     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2358     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2359     if (fact <= 1.0) fact = 1.39;
2360     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2361     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2362   }
2363   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 /*MC
2368    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2369 
2370    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
2371    and MATMPISBAIJ otherwise.
2372 
2373    Options Database Keys:
2374 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
2375 
2376   Level: beginner
2377 
2378 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
2379 M*/
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
2383 /*@C
2384    MatMPISBAIJSetPreallocation - For good matrix assembly performance
2385    the user should preallocate the matrix storage by setting the parameters
2386    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2387    performance can be increased by more than a factor of 50.
2388 
2389    Collective on Mat
2390 
2391    Input Parameters:
2392 +  B - the matrix
2393 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2394           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2395 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2396            submatrix  (same for all local rows)
2397 .  d_nnz - array containing the number of block nonzeros in the various block rows
2398            in the upper triangular and diagonal part of the in diagonal portion of the local
2399            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
2400            for the diagonal entry and set a value even if it is zero.
2401 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2402            submatrix (same for all local rows).
2403 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2404            off-diagonal portion of the local submatrix that is right of the diagonal
2405            (possibly different for each block row) or NULL.
2406 
2407 
2408    Options Database Keys:
2409 .   -mat_no_unroll - uses code that does not unroll the loops in the
2410                      block calculations (much slower)
2411 .   -mat_block_size - size of the blocks to use
2412 
2413    Notes:
2414 
2415    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2416    than it must be used on all processors that share the object for that argument.
2417 
2418    If the *_nnz parameter is given then the *_nz parameter is ignored
2419 
2420    Storage Information:
2421    For a square global matrix we define each processor's diagonal portion
2422    to be its local rows and the corresponding columns (a square submatrix);
2423    each processor's off-diagonal portion encompasses the remainder of the
2424    local matrix (a rectangular submatrix).
2425 
2426    The user can specify preallocated storage for the diagonal part of
2427    the local submatrix with either d_nz or d_nnz (not both).  Set
2428    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2429    memory allocation.  Likewise, specify preallocated storage for the
2430    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2431 
2432    You can call MatGetInfo() to get information on how effective the preallocation was;
2433    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2434    You can also run with the option -info and look for messages with the string
2435    malloc in them to see if additional memory allocation was needed.
2436 
2437    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2438    the figure below we depict these three local rows and all columns (0-11).
2439 
2440 .vb
2441            0 1 2 3 4 5 6 7 8 9 10 11
2442           --------------------------
2443    row 3  |. . . d d d o o o o  o  o
2444    row 4  |. . . d d d o o o o  o  o
2445    row 5  |. . . d d d o o o o  o  o
2446           --------------------------
2447 .ve
2448 
2449    Thus, any entries in the d locations are stored in the d (diagonal)
2450    submatrix, and any entries in the o locations are stored in the
2451    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2452    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2453 
2454    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2455    plus the diagonal part of the d matrix,
2456    and o_nz should indicate the number of block nonzeros per row in the o matrix
2457 
2458    In general, for PDE problems in which most nonzeros are near the diagonal,
2459    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2460    or you will get TERRIBLE performance; see the users' manual chapter on
2461    matrices.
2462 
2463    Level: intermediate
2464 
2465 .keywords: matrix, block, aij, compressed row, sparse, parallel
2466 
2467 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2468 @*/
2469 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2470 {
2471   PetscErrorCode ierr;
2472 
2473   PetscFunctionBegin;
2474   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2475   PetscValidType(B,1);
2476   PetscValidLogicalCollectiveInt(B,bs,2);
2477   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatCreateSBAIJ"
2483 /*@C
2484    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2485    (block compressed row).  For good matrix assembly performance
2486    the user should preallocate the matrix storage by setting the parameters
2487    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2488    performance can be increased by more than a factor of 50.
2489 
2490    Collective on MPI_Comm
2491 
2492    Input Parameters:
2493 +  comm - MPI communicator
2494 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2495           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2496 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2497            This value should be the same as the local size used in creating the
2498            y vector for the matrix-vector product y = Ax.
2499 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2500            This value should be the same as the local size used in creating the
2501            x vector for the matrix-vector product y = Ax.
2502 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2503 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2504 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2505            submatrix  (same for all local rows)
2506 .  d_nnz - array containing the number of block nonzeros in the various block rows
2507            in the upper triangular portion of the in diagonal portion of the local
2508            (possibly different for each block block row) or NULL.
2509            If you plan to factor the matrix you must leave room for the diagonal entry and
2510            set its value even if it is zero.
2511 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2512            submatrix (same for all local rows).
2513 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2514            off-diagonal portion of the local submatrix (possibly different for
2515            each block row) or NULL.
2516 
2517    Output Parameter:
2518 .  A - the matrix
2519 
2520    Options Database Keys:
2521 .   -mat_no_unroll - uses code that does not unroll the loops in the
2522                      block calculations (much slower)
2523 .   -mat_block_size - size of the blocks to use
2524 .   -mat_mpi - use the parallel matrix data structures even on one processor
2525                (defaults to using SeqBAIJ format on one processor)
2526 
2527    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2528    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2529    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2530 
2531    Notes:
2532    The number of rows and columns must be divisible by blocksize.
2533    This matrix type does not support complex Hermitian operation.
2534 
2535    The user MUST specify either the local or global matrix dimensions
2536    (possibly both).
2537 
2538    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2539    than it must be used on all processors that share the object for that argument.
2540 
2541    If the *_nnz parameter is given then the *_nz parameter is ignored
2542 
2543    Storage Information:
2544    For a square global matrix we define each processor's diagonal portion
2545    to be its local rows and the corresponding columns (a square submatrix);
2546    each processor's off-diagonal portion encompasses the remainder of the
2547    local matrix (a rectangular submatrix).
2548 
2549    The user can specify preallocated storage for the diagonal part of
2550    the local submatrix with either d_nz or d_nnz (not both).  Set
2551    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2552    memory allocation.  Likewise, specify preallocated storage for the
2553    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2554 
2555    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2556    the figure below we depict these three local rows and all columns (0-11).
2557 
2558 .vb
2559            0 1 2 3 4 5 6 7 8 9 10 11
2560           --------------------------
2561    row 3  |. . . d d d o o o o  o  o
2562    row 4  |. . . d d d o o o o  o  o
2563    row 5  |. . . d d d o o o o  o  o
2564           --------------------------
2565 .ve
2566 
2567    Thus, any entries in the d locations are stored in the d (diagonal)
2568    submatrix, and any entries in the o locations are stored in the
2569    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2570    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2571 
2572    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2573    plus the diagonal part of the d matrix,
2574    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2575    In general, for PDE problems in which most nonzeros are near the diagonal,
2576    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2577    or you will get TERRIBLE performance; see the users' manual chapter on
2578    matrices.
2579 
2580    Level: intermediate
2581 
2582 .keywords: matrix, block, aij, compressed row, sparse, parallel
2583 
2584 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2585 @*/
2586 
2587 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2588 {
2589   PetscErrorCode ierr;
2590   PetscMPIInt    size;
2591 
2592   PetscFunctionBegin;
2593   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2594   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2596   if (size > 1) {
2597     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2598     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2599   } else {
2600     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2601     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2602   }
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2609 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2610 {
2611   Mat            mat;
2612   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2613   PetscErrorCode ierr;
2614   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2615   PetscScalar    *array;
2616 
2617   PetscFunctionBegin;
2618   *newmat = 0;
2619 
2620   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2621   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2622   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2623   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2624   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2625   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2626 
2627   mat->factortype   = matin->factortype;
2628   mat->preallocated = PETSC_TRUE;
2629   mat->assembled    = PETSC_TRUE;
2630   mat->insertmode   = NOT_SET_VALUES;
2631 
2632   a      = (Mat_MPISBAIJ*)mat->data;
2633   a->bs2 = oldmat->bs2;
2634   a->mbs = oldmat->mbs;
2635   a->nbs = oldmat->nbs;
2636   a->Mbs = oldmat->Mbs;
2637   a->Nbs = oldmat->Nbs;
2638 
2639 
2640   a->size         = oldmat->size;
2641   a->rank         = oldmat->rank;
2642   a->donotstash   = oldmat->donotstash;
2643   a->roworiented  = oldmat->roworiented;
2644   a->rowindices   = 0;
2645   a->rowvalues    = 0;
2646   a->getrowactive = PETSC_FALSE;
2647   a->barray       = 0;
2648   a->rstartbs     = oldmat->rstartbs;
2649   a->rendbs       = oldmat->rendbs;
2650   a->cstartbs     = oldmat->cstartbs;
2651   a->cendbs       = oldmat->cendbs;
2652 
2653   /* hash table stuff */
2654   a->ht           = 0;
2655   a->hd           = 0;
2656   a->ht_size      = 0;
2657   a->ht_flag      = oldmat->ht_flag;
2658   a->ht_fact      = oldmat->ht_fact;
2659   a->ht_total_ct  = 0;
2660   a->ht_insert_ct = 0;
2661 
2662   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2663   if (oldmat->colmap) {
2664 #if defined(PETSC_USE_CTABLE)
2665     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2666 #else
2667     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2668     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2669     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2670 #endif
2671   } else a->colmap = 0;
2672 
2673   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2674     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2675     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2676     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2677   } else a->garray = 0;
2678 
2679   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2680   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2681   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2682   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2683   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2684 
2685   ierr = VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2686   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2687   ierr = VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2688   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2689 
2690   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2691   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2692   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2693   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2694   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2695   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2696   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2697   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2698   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2699   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2700   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2701   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2702   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2703 
2704   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2705   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2706   a->sMvctx = oldmat->sMvctx;
2707   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2708 
2709   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2710   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2711   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2712   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2713   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2714   *newmat = mat;
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatLoad_MPISBAIJ"
2720 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2721 {
2722   PetscErrorCode ierr;
2723   PetscInt       i,nz,j,rstart,rend;
2724   PetscScalar    *vals,*buf;
2725   MPI_Comm       comm;
2726   MPI_Status     status;
2727   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2728   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2729   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2730   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2731   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2732   PetscInt       dcount,kmax,k,nzcount,tmp;
2733   int            fd;
2734 
2735   PetscFunctionBegin;
2736   /* force binary viewer to load .info file if it has not yet done so */
2737   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2738   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2739   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2740   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2741   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2742   if (bs < 0) bs = 1;
2743 
2744   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2745   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2746   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2747   if (!rank) {
2748     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2749     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2750     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2751   }
2752 
2753   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2754   M    = header[1];
2755   N    = header[2];
2756 
2757   /* If global sizes are set, check if they are consistent with that given in the file */
2758   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2759   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2760 
2761   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2762 
2763   /*
2764      This code adds extra rows to make sure the number of rows is
2765      divisible by the blocksize
2766   */
2767   Mbs        = M/bs;
2768   extra_rows = bs - M + bs*(Mbs);
2769   if (extra_rows == bs) extra_rows = 0;
2770   else                  Mbs++;
2771   if (extra_rows &&!rank) {
2772     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2773   }
2774 
2775   /* determine ownership of all rows */
2776   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2777     mbs = Mbs/size + ((Mbs % size) > rank);
2778     m   = mbs*bs;
2779   } else { /* User Set */
2780     m   = newmat->rmap->n;
2781     mbs = m/bs;
2782   }
2783   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2784   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2785   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2786   rowners[0] = 0;
2787   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2788   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2789   rstart = rowners[rank];
2790   rend   = rowners[rank+1];
2791 
2792   /* distribute row lengths to all processors */
2793   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2794   if (!rank) {
2795     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2796     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2797     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2798     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2799     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2800     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2801     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2802   } else {
2803     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2804   }
2805 
2806   if (!rank) {   /* procs[0] */
2807     /* calculate the number of nonzeros on each processor */
2808     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2809     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2810     for (i=0; i<size; i++) {
2811       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2812         procsnz[i] += rowlengths[j];
2813       }
2814     }
2815     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2816 
2817     /* determine max buffer needed and allocate it */
2818     maxnz = 0;
2819     for (i=0; i<size; i++) {
2820       maxnz = PetscMax(maxnz,procsnz[i]);
2821     }
2822     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2823 
2824     /* read in my part of the matrix column indices  */
2825     nz     = procsnz[0];
2826     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2827     mycols = ibuf;
2828     if (size == 1) nz -= extra_rows;
2829     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2830     if (size == 1) {
2831       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2832     }
2833 
2834     /* read in every ones (except the last) and ship off */
2835     for (i=1; i<size-1; i++) {
2836       nz   = procsnz[i];
2837       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2838       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2839     }
2840     /* read in the stuff for the last proc */
2841     if (size != 1) {
2842       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2843       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2844       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2845       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2846     }
2847     ierr = PetscFree(cols);CHKERRQ(ierr);
2848   } else {  /* procs[i], i>0 */
2849     /* determine buffer space needed for message */
2850     nz = 0;
2851     for (i=0; i<m; i++) nz += locrowlens[i];
2852     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2853     mycols = ibuf;
2854     /* receive message of column indices*/
2855     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2856     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2857     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2858   }
2859 
2860   /* loop over local rows, determining number of off diagonal entries */
2861   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2862   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2863   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2864   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2865   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2866   rowcount = 0;
2867   nzcount  = 0;
2868   for (i=0; i<mbs; i++) {
2869     dcount  = 0;
2870     odcount = 0;
2871     for (j=0; j<bs; j++) {
2872       kmax = locrowlens[rowcount];
2873       for (k=0; k<kmax; k++) {
2874         tmp = mycols[nzcount++]/bs; /* block col. index */
2875         if (!mask[tmp]) {
2876           mask[tmp] = 1;
2877           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2878           else masked1[dcount++] = tmp; /* entry in diag portion */
2879         }
2880       }
2881       rowcount++;
2882     }
2883 
2884     dlens[i]  = dcount;  /* d_nzz[i] */
2885     odlens[i] = odcount; /* o_nzz[i] */
2886 
2887     /* zero out the mask elements we set */
2888     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2889     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2890   }
2891   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2892   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2893   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2894 
2895   if (!rank) {
2896     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2897     /* read in my part of the matrix numerical values  */
2898     nz     = procsnz[0];
2899     vals   = buf;
2900     mycols = ibuf;
2901     if (size == 1) nz -= extra_rows;
2902     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2903     if (size == 1) {
2904       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2905     }
2906 
2907     /* insert into matrix */
2908     jj = rstart*bs;
2909     for (i=0; i<m; i++) {
2910       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2911       mycols += locrowlens[i];
2912       vals   += locrowlens[i];
2913       jj++;
2914     }
2915 
2916     /* read in other processors (except the last one) and ship out */
2917     for (i=1; i<size-1; i++) {
2918       nz   = procsnz[i];
2919       vals = buf;
2920       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2921       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2922     }
2923     /* the last proc */
2924     if (size != 1) {
2925       nz   = procsnz[i] - extra_rows;
2926       vals = buf;
2927       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2928       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2929       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2930     }
2931     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2932 
2933   } else {
2934     /* receive numeric values */
2935     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2936 
2937     /* receive message of values*/
2938     vals   = buf;
2939     mycols = ibuf;
2940     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2941     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2942     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2943 
2944     /* insert into matrix */
2945     jj = rstart*bs;
2946     for (i=0; i<m; i++) {
2947       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2948       mycols += locrowlens[i];
2949       vals   += locrowlens[i];
2950       jj++;
2951     }
2952   }
2953 
2954   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2955   ierr = PetscFree(buf);CHKERRQ(ierr);
2956   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2957   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2958   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2959   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2960   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 #undef __FUNCT__
2966 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2967 /*XXXXX@
2968    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2969 
2970    Input Parameters:
2971 .  mat  - the matrix
2972 .  fact - factor
2973 
2974    Not Collective on Mat, each process can have a different hash factor
2975 
2976    Level: advanced
2977 
2978   Notes:
2979    This can also be set by the command line option: -mat_use_hash_table fact
2980 
2981 .keywords: matrix, hashtable, factor, HT
2982 
2983 .seealso: MatSetOption()
2984 @XXXXX*/
2985 
2986 
2987 #undef __FUNCT__
2988 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2989 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2990 {
2991   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2992   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2993   PetscReal      atmp;
2994   PetscReal      *work,*svalues,*rvalues;
2995   PetscErrorCode ierr;
2996   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2997   PetscMPIInt    rank,size;
2998   PetscInt       *rowners_bs,dest,count,source;
2999   PetscScalar    *va;
3000   MatScalar      *ba;
3001   MPI_Status     stat;
3002 
3003   PetscFunctionBegin;
3004   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
3005   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
3006   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3007 
3008   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
3009   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
3010 
3011   bs  = A->rmap->bs;
3012   mbs = a->mbs;
3013   Mbs = a->Mbs;
3014   ba  = b->a;
3015   bi  = b->i;
3016   bj  = b->j;
3017 
3018   /* find ownerships */
3019   rowners_bs = A->rmap->range;
3020 
3021   /* each proc creates an array to be distributed */
3022   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
3023   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
3024 
3025   /* row_max for B */
3026   if (rank != size-1) {
3027     for (i=0; i<mbs; i++) {
3028       ncols = bi[1] - bi[0]; bi++;
3029       brow  = bs*i;
3030       for (j=0; j<ncols; j++) {
3031         bcol = bs*(*bj);
3032         for (kcol=0; kcol<bs; kcol++) {
3033           col  = bcol + kcol;                /* local col index */
3034           col += rowners_bs[rank+1];      /* global col index */
3035           for (krow=0; krow<bs; krow++) {
3036             atmp = PetscAbsScalar(*ba); ba++;
3037             row  = brow + krow;   /* local row index */
3038             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
3039             if (work[col] < atmp) work[col] = atmp;
3040           }
3041         }
3042         bj++;
3043       }
3044     }
3045 
3046     /* send values to its owners */
3047     for (dest=rank+1; dest<size; dest++) {
3048       svalues = work + rowners_bs[dest];
3049       count   = rowners_bs[dest+1]-rowners_bs[dest];
3050       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
3051     }
3052   }
3053 
3054   /* receive values */
3055   if (rank) {
3056     rvalues = work;
3057     count   = rowners_bs[rank+1]-rowners_bs[rank];
3058     for (source=0; source<rank; source++) {
3059       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
3060       /* process values */
3061       for (i=0; i<count; i++) {
3062         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
3063       }
3064     }
3065   }
3066 
3067   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3068   ierr = PetscFree(work);CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatSOR_MPISBAIJ"
3074 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3075 {
3076   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
3077   PetscErrorCode    ierr;
3078   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
3079   PetscScalar       *x,*ptr,*from;
3080   Vec               bb1;
3081   const PetscScalar *b;
3082 
3083   PetscFunctionBegin;
3084   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3085   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3086 
3087   if (flag == SOR_APPLY_UPPER) {
3088     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3089     PetscFunctionReturn(0);
3090   }
3091 
3092   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3093     if (flag & SOR_ZERO_INITIAL_GUESS) {
3094       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3095       its--;
3096     }
3097 
3098     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3099     while (its--) {
3100 
3101       /* lower triangular part: slvec0b = - B^T*xx */
3102       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3103 
3104       /* copy xx into slvec0a */
3105       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3106       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3107       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3108       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
3109 
3110       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
3111 
3112       /* copy bb into slvec1a */
3113       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3114       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3115       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3116       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
3117 
3118       /* set slvec1b = 0 */
3119       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3120 
3121       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3122       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3123       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3124       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3125 
3126       /* upper triangular part: bb1 = bb1 - B*x */
3127       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
3128 
3129       /* local diagonal sweep */
3130       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3131     }
3132     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3133   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3134     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3135   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
3136     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
3137   } else if (flag & SOR_EISENSTAT) {
3138     Vec               xx1;
3139     PetscBool         hasop;
3140     const PetscScalar *diag;
3141     PetscScalar       *sl,scale = (omega - 2.0)/omega;
3142     PetscInt          i,n;
3143 
3144     if (!mat->xx1) {
3145       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
3146       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
3147     }
3148     xx1 = mat->xx1;
3149     bb1 = mat->bb1;
3150 
3151     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
3152 
3153     if (!mat->diag) {
3154       /* this is wrong for same matrix with new nonzero values */
3155       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
3156       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
3157     }
3158     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
3159 
3160     if (hasop) {
3161       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
3162       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3163     } else {
3164       /*
3165           These two lines are replaced by code that may be a bit faster for a good compiler
3166       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
3167       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
3168       */
3169       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3170       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3171       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
3172       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3173       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
3174       if (omega == 1.0) {
3175         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
3176         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
3177       } else {
3178         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
3179         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
3180       }
3181       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
3182       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
3183       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
3184       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3185     }
3186 
3187     /* multiply off-diagonal portion of matrix */
3188     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
3189     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
3190     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
3191     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
3192     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
3193     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
3194     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
3195     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3196     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3197     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
3198 
3199     /* local sweep */
3200     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
3201     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
3202   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3203   PetscFunctionReturn(0);
3204 }
3205 
3206 #undef __FUNCT__
3207 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
3208 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
3209 {
3210   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
3211   PetscErrorCode ierr;
3212   Vec            lvec1,bb1;
3213 
3214   PetscFunctionBegin;
3215   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
3216   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
3217 
3218   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
3219     if (flag & SOR_ZERO_INITIAL_GUESS) {
3220       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
3221       its--;
3222     }
3223 
3224     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
3225     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
3226     while (its--) {
3227       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3228 
3229       /* lower diagonal part: bb1 = bb - B^T*xx */
3230       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
3231       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
3232 
3233       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3234       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
3235       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3236 
3237       /* upper diagonal part: bb1 = bb1 - B*x */
3238       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
3239       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
3240 
3241       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
3242 
3243       /* diagonal sweep */
3244       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
3245     }
3246     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
3247     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
3248   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
3249   PetscFunctionReturn(0);
3250 }
3251 
3252 #undef __FUNCT__
3253 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
3254 /*@
3255      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
3256          CSR format the local rows.
3257 
3258    Collective on MPI_Comm
3259 
3260    Input Parameters:
3261 +  comm - MPI communicator
3262 .  bs - the block size, only a block size of 1 is supported
3263 .  m - number of local rows (Cannot be PETSC_DECIDE)
3264 .  n - This value should be the same as the local size used in creating the
3265        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3266        calculated if N is given) For square matrices n is almost always m.
3267 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3268 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3269 .   i - row indices
3270 .   j - column indices
3271 -   a - matrix values
3272 
3273    Output Parameter:
3274 .   mat - the matrix
3275 
3276    Level: intermediate
3277 
3278    Notes:
3279        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3280      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3281      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3282 
3283        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3284 
3285 .keywords: matrix, aij, compressed row, sparse, parallel
3286 
3287 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3288           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3289 @*/
3290 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3291 {
3292   PetscErrorCode ierr;
3293 
3294 
3295   PetscFunctionBegin;
3296   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3297   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3298   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3299   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3300   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3301   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
3308 /*@C
3309    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3310    (the default parallel PETSc format).
3311 
3312    Collective on MPI_Comm
3313 
3314    Input Parameters:
3315 +  B - the matrix
3316 .  bs - the block size
3317 .  i - the indices into j for the start of each local row (starts with zero)
3318 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3319 -  v - optional values in the matrix
3320 
3321    Level: developer
3322 
3323 .keywords: matrix, aij, compressed row, sparse, parallel
3324 
3325 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
3326 @*/
3327 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3328 {
3329   PetscErrorCode ierr;
3330 
3331   PetscFunctionBegin;
3332   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
3338 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3339 {
3340   PetscErrorCode ierr;
3341   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3342   PetscInt       *indx;
3343   PetscScalar    *values;
3344 
3345   PetscFunctionBegin;
3346   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3347   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3348     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
3349     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
3350     PetscInt       *bindx,rmax=a->rmax,j;
3351 
3352     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3353     mbs = m/bs; Nbs = N/cbs;
3354     if (n == PETSC_DECIDE) {
3355       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3356     }
3357     /* Check sum(n) = Nbs */
3358     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3359     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3360 
3361     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3362     rstart -= mbs;
3363 
3364     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3365     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3366     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3367     for (i=0; i<mbs; i++) {
3368       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3369       nnz = nnz/bs;
3370       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3371       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3372       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3373     }
3374     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3375     ierr = PetscFree(bindx);CHKERRQ(ierr);
3376 
3377     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3378     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3379     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3380     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
3381     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3382     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3383   }
3384 
3385   /* numeric phase */
3386   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3387   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3388 
3389   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
3390   for (i=0; i<m; i++) {
3391     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3392     Ii   = i + rstart;
3393     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3394     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3395   }
3396   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
3397   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3398   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3399   PetscFunctionReturn(0);
3400 }
3401