xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 262ecc7176e93fbdedf2c042ec9dcb340e9750eb)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #if defined(PETSC_HAVE_ELEMENTAL)
8 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
9 #endif
10 #undef __FUNCT__
11 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
12 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
13 {
14   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
15   PetscErrorCode ierr;
16 
17   PetscFunctionBegin;
18   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
19   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
20   PetscFunctionReturn(0);
21 }
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
25 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
32   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
37   { \
38  \
39     brow = row/bs;  \
40     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
41     rmax = aimax[brow]; nrow = ailen[brow]; \
42     bcol = col/bs; \
43     ridx = row % bs; cidx = col % bs; \
44     low  = 0; high = nrow; \
45     while (high-low > 3) { \
46       t = (low+high)/2; \
47       if (rp[t] > bcol) high = t; \
48       else              low  = t; \
49     } \
50     for (_i=low; _i<high; _i++) { \
51       if (rp[_i] > bcol) break; \
52       if (rp[_i] == bcol) { \
53         bap = ap + bs2*_i + bs*cidx + ridx; \
54         if (addv == ADD_VALUES) *bap += value;  \
55         else                    *bap  = value;  \
56         goto a_noinsert; \
57       } \
58     } \
59     if (a->nonew == 1) goto a_noinsert; \
60     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
61     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
62     N = nrow++ - 1;  \
63     /* shift up all the later entries in this row */ \
64     for (ii=N; ii>=_i; ii--) { \
65       rp[ii+1] = rp[ii]; \
66       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
67     } \
68     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
69     rp[_i]                      = bcol;  \
70     ap[bs2*_i + bs*cidx + ridx] = value;  \
71     A->nonzerostate++;\
72 a_noinsert:; \
73     ailen[brow] = nrow; \
74   }
75 
76 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
77   { \
78     brow = row/bs;  \
79     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
80     rmax = bimax[brow]; nrow = bilen[brow]; \
81     bcol = col/bs; \
82     ridx = row % bs; cidx = col % bs; \
83     low  = 0; high = nrow; \
84     while (high-low > 3) { \
85       t = (low+high)/2; \
86       if (rp[t] > bcol) high = t; \
87       else              low  = t; \
88     } \
89     for (_i=low; _i<high; _i++) { \
90       if (rp[_i] > bcol) break; \
91       if (rp[_i] == bcol) { \
92         bap = ap + bs2*_i + bs*cidx + ridx; \
93         if (addv == ADD_VALUES) *bap += value;  \
94         else                    *bap  = value;  \
95         goto b_noinsert; \
96       } \
97     } \
98     if (b->nonew == 1) goto b_noinsert; \
99     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
100     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
101     N = nrow++ - 1;  \
102     /* shift up all the later entries in this row */ \
103     for (ii=N; ii>=_i; ii--) { \
104       rp[ii+1] = rp[ii]; \
105       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
106     } \
107     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
108     rp[_i]                      = bcol;  \
109     ap[bs2*_i + bs*cidx + ridx] = value;  \
110     B->nonzerostate++;\
111 b_noinsert:; \
112     bilen[brow] = nrow; \
113   }
114 
115 /* Only add/insert a(i,j) with i<=j (blocks).
116    Any a(i,j) with i>j input by user is ingored.
117 */
118 #undef __FUNCT__
119 #define __FUNCT__ "MatSetValues_MPISBAIJ"
120 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
121 {
122   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
123   MatScalar      value;
124   PetscBool      roworiented = baij->roworiented;
125   PetscErrorCode ierr;
126   PetscInt       i,j,row,col;
127   PetscInt       rstart_orig=mat->rmap->rstart;
128   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
129   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
130 
131   /* Some Variables required in the macro */
132   Mat          A     = baij->A;
133   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
134   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
135   MatScalar    *aa   =a->a;
136 
137   Mat         B     = baij->B;
138   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
139   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
140   MatScalar   *ba   =b->a;
141 
142   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
143   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
144   MatScalar *ap,*bap;
145 
146   /* for stash */
147   PetscInt  n_loc, *in_loc = NULL;
148   MatScalar *v_loc = NULL;
149 
150   PetscFunctionBegin;
151   if (!baij->donotstash) {
152     if (n > baij->n_loc) {
153       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
154       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
155       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
156       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
157 
158       baij->n_loc = n;
159     }
160     in_loc = baij->in_loc;
161     v_loc  = baij->v_loc;
162   }
163 
164   for (i=0; i<m; i++) {
165     if (im[i] < 0) continue;
166 #if defined(PETSC_USE_DEBUG)
167     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
168 #endif
169     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
170       row = im[i] - rstart_orig;              /* local row index */
171       for (j=0; j<n; j++) {
172         if (im[i]/bs > in[j]/bs) {
173           if (a->ignore_ltriangular) {
174             continue;    /* ignore lower triangular blocks */
175           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
176         }
177         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
178           col  = in[j] - cstart_orig;         /* local col index */
179           brow = row/bs; bcol = col/bs;
180           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
181           if (roworiented) value = v[i*n+j];
182           else             value = v[i+j*m];
183           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
184           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
185         } else if (in[j] < 0) continue;
186 #if defined(PETSC_USE_DEBUG)
187         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
188 #endif
189         else {  /* off-diag entry (B) */
190           if (mat->was_assembled) {
191             if (!baij->colmap) {
192               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
193             }
194 #if defined(PETSC_USE_CTABLE)
195             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
196             col  = col - 1;
197 #else
198             col = baij->colmap[in[j]/bs] - 1;
199 #endif
200             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
201               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
202               col  =  in[j];
203               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
204               B    = baij->B;
205               b    = (Mat_SeqBAIJ*)(B)->data;
206               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
207               ba   = b->a;
208             } else col += in[j]%bs;
209           } else col = in[j];
210           if (roworiented) value = v[i*n+j];
211           else             value = v[i+j*m];
212           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
213           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
214         }
215       }
216     } else {  /* off processor entry */
217       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
218       if (!baij->donotstash) {
219         mat->assembled = PETSC_FALSE;
220         n_loc          = 0;
221         for (j=0; j<n; j++) {
222           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
223           in_loc[n_loc] = in[j];
224           if (roworiented) {
225             v_loc[n_loc] = v[i*n+j];
226           } else {
227             v_loc[n_loc] = v[j*m+i];
228           }
229           n_loc++;
230         }
231         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
232       }
233     }
234   }
235   PetscFunctionReturn(0);
236 }
237 
238 #undef __FUNCT__
239 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
240 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
241 {
242   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
243   const MatScalar *value;
244   MatScalar       *barray     =baij->barray;
245   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
246   PetscErrorCode  ierr;
247   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
248   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
249   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
250 
251   PetscFunctionBegin;
252   if (!barray) {
253     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
254     baij->barray = barray;
255   }
256 
257   if (roworiented) {
258     stepval = (n-1)*bs;
259   } else {
260     stepval = (m-1)*bs;
261   }
262   for (i=0; i<m; i++) {
263     if (im[i] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
266 #endif
267     if (im[i] >= rstart && im[i] < rend) {
268       row = im[i] - rstart;
269       for (j=0; j<n; j++) {
270         if (im[i] > in[j]) {
271           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
272           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
273         }
274         /* If NumCol = 1 then a copy is not required */
275         if ((roworiented) && (n == 1)) {
276           barray = (MatScalar*) v + i*bs2;
277         } else if ((!roworiented) && (m == 1)) {
278           barray = (MatScalar*) v + j*bs2;
279         } else { /* Here a copy is required */
280           if (roworiented) {
281             value = v + i*(stepval+bs)*bs + j*bs;
282           } else {
283             value = v + j*(stepval+bs)*bs + i*bs;
284           }
285           for (ii=0; ii<bs; ii++,value+=stepval) {
286             for (jj=0; jj<bs; jj++) {
287               *barray++ = *value++;
288             }
289           }
290           barray -=bs2;
291         }
292 
293         if (in[j] >= cstart && in[j] < cend) {
294           col  = in[j] - cstart;
295           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
296         } else if (in[j] < 0) continue;
297 #if defined(PETSC_USE_DEBUG)
298         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
299 #endif
300         else {
301           if (mat->was_assembled) {
302             if (!baij->colmap) {
303               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
304             }
305 
306 #if defined(PETSC_USE_DEBUG)
307 #if defined(PETSC_USE_CTABLE)
308             { PetscInt data;
309               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
310               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
311             }
312 #else
313             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
314 #endif
315 #endif
316 #if defined(PETSC_USE_CTABLE)
317             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
318             col  = (col - 1)/bs;
319 #else
320             col = (baij->colmap[in[j]] - 1)/bs;
321 #endif
322             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
323               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
324               col  = in[j];
325             }
326           } else col = in[j];
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329       }
330     } else {
331       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
332       if (!baij->donotstash) {
333         if (roworiented) {
334           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
335         } else {
336           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
337         }
338       }
339     }
340   }
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatGetValues_MPISBAIJ"
346 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
347 {
348   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
349   PetscErrorCode ierr;
350   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
351   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
352 
353   PetscFunctionBegin;
354   for (i=0; i<m; i++) {
355     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
356     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
357     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
358       row = idxm[i] - bsrstart;
359       for (j=0; j<n; j++) {
360         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
361         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
362         if (idxn[j] >= bscstart && idxn[j] < bscend) {
363           col  = idxn[j] - bscstart;
364           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
365         } else {
366           if (!baij->colmap) {
367             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
368           }
369 #if defined(PETSC_USE_CTABLE)
370           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
371           data--;
372 #else
373           data = baij->colmap[idxn[j]/bs]-1;
374 #endif
375           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
376           else {
377             col  = data + idxn[j]%bs;
378             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
379           }
380         }
381       }
382     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
383   }
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNCT__
388 #define __FUNCT__ "MatNorm_MPISBAIJ"
389 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
390 {
391   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
392   PetscErrorCode ierr;
393   PetscReal      sum[2],*lnorm2;
394 
395   PetscFunctionBegin;
396   if (baij->size == 1) {
397     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
398   } else {
399     if (type == NORM_FROBENIUS) {
400       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
401       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
402       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
403       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
404       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
405       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
406       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
407       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
408     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
409       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
410       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
411       PetscReal    *rsum,*rsum2,vabs;
412       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
413       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
414       MatScalar    *v;
415 
416       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
417       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
418       /* Amat */
419       v = amat->a; jj = amat->j;
420       for (brow=0; brow<mbs; brow++) {
421         grow = bs*(rstart + brow);
422         nz   = amat->i[brow+1] - amat->i[brow];
423         for (bcol=0; bcol<nz; bcol++) {
424           gcol = bs*(rstart + *jj); jj++;
425           for (col=0; col<bs; col++) {
426             for (row=0; row<bs; row++) {
427               vabs            = PetscAbsScalar(*v); v++;
428               rsum[gcol+col] += vabs;
429               /* non-diagonal block */
430               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
431             }
432           }
433         }
434       }
435       /* Bmat */
436       v = bmat->a; jj = bmat->j;
437       for (brow=0; brow<mbs; brow++) {
438         grow = bs*(rstart + brow);
439         nz = bmat->i[brow+1] - bmat->i[brow];
440         for (bcol=0; bcol<nz; bcol++) {
441           gcol = bs*garray[*jj]; jj++;
442           for (col=0; col<bs; col++) {
443             for (row=0; row<bs; row++) {
444               vabs            = PetscAbsScalar(*v); v++;
445               rsum[gcol+col] += vabs;
446               rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
452       *norm = 0.0;
453       for (col=0; col<mat->cmap->N; col++) {
454         if (rsum2[col] > *norm) *norm = rsum2[col];
455       }
456       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
457     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
464 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
465 {
466   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
467   PetscErrorCode ierr;
468   PetscInt       nstash,reallocs;
469   InsertMode     addv;
470 
471   PetscFunctionBegin;
472   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
473 
474   /* make sure all processors are either in INSERTMODE or ADDMODE */
475   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
476   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
477   mat->insertmode = addv; /* in case this processor had no cache */
478 
479   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
480   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
481   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
482   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
483   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
484   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
485   PetscFunctionReturn(0);
486 }
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
490 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
491 {
492   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
493   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
494   PetscErrorCode ierr;
495   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
496   PetscInt       *row,*col;
497   PetscBool      other_disassembled;
498   PetscMPIInt    n;
499   PetscBool      r1,r2,r3;
500   MatScalar      *val;
501   InsertMode     addv = mat->insertmode;
502 
503   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
504   PetscFunctionBegin;
505   if (!baij->donotstash &&  !mat->nooffprocentries) {
506     while (1) {
507       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
508       if (!flg) break;
509 
510       for (i=0; i<n;) {
511         /* Now identify the consecutive vals belonging to the same row */
512         for (j=i,rstart=row[j]; j<n; j++) {
513           if (row[j] != rstart) break;
514         }
515         if (j < n) ncols = j-i;
516         else       ncols = n-i;
517         /* Now assemble all these values with a single function call */
518         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
519         i    = j;
520       }
521     }
522     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
523     /* Now process the block-stash. Since the values are stashed column-oriented,
524        set the roworiented flag to column oriented, and after MatSetValues()
525        restore the original flags */
526     r1 = baij->roworiented;
527     r2 = a->roworiented;
528     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
529 
530     baij->roworiented = PETSC_FALSE;
531     a->roworiented    = PETSC_FALSE;
532 
533     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
534     while (1) {
535       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
536       if (!flg) break;
537 
538       for (i=0; i<n;) {
539         /* Now identify the consecutive vals belonging to the same row */
540         for (j=i,rstart=row[j]; j<n; j++) {
541           if (row[j] != rstart) break;
542         }
543         if (j < n) ncols = j-i;
544         else       ncols = n-i;
545         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
546         i    = j;
547       }
548     }
549     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
550 
551     baij->roworiented = r1;
552     a->roworiented    = r2;
553 
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
555   }
556 
557   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
558   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
559 
560   /* determine if any processor has disassembled, if so we must
561      also disassemble ourselfs, in order that we may reassemble. */
562   /*
563      if nonzero structure of submatrix B cannot change then we know that
564      no processor disassembled thus we can skip this stuff
565   */
566   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
567     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
568     if (mat->was_assembled && !other_disassembled) {
569       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
570     }
571   }
572 
573   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
574     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
575   }
576   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
577   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
578 
579   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
580 
581   baij->rowvalues = 0;
582 
583   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
584   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
585     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
586     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
592 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
593 #include <petscdraw.h>
594 #undef __FUNCT__
595 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
596 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
597 {
598   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
599   PetscErrorCode    ierr;
600   PetscInt          bs   = mat->rmap->bs;
601   PetscMPIInt       rank = baij->rank;
602   PetscBool         iascii,isdraw;
603   PetscViewer       sviewer;
604   PetscViewerFormat format;
605 
606   PetscFunctionBegin;
607   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
608   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
609   if (iascii) {
610     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
611     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
612       MatInfo info;
613       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
614       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
615       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
616       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
617       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
618       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
619       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
620       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
621       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
622       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
623       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
624       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
625       PetscFunctionReturn(0);
626     } else if (format == PETSC_VIEWER_ASCII_INFO) {
627       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
628       PetscFunctionReturn(0);
629     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
630       PetscFunctionReturn(0);
631     }
632   }
633 
634   if (isdraw) {
635     PetscDraw draw;
636     PetscBool isnull;
637     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
638     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
639   }
640 
641   {
642     /* assemble the entire matrix onto first processor. */
643     Mat          A;
644     Mat_SeqSBAIJ *Aloc;
645     Mat_SeqBAIJ  *Bloc;
646     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
647     MatScalar    *a;
648     const char   *matname;
649 
650     /* Should this be the same type as mat? */
651     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
652     if (!rank) {
653       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
654     } else {
655       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
656     }
657     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
658     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
659     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
660     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
661 
662     /* copy over the A part */
663     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
664     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
665     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
666 
667     for (i=0; i<mbs; i++) {
668       rvals[0] = bs*(baij->rstartbs + i);
669       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
670       for (j=ai[i]; j<ai[i+1]; j++) {
671         col = (baij->cstartbs+aj[j])*bs;
672         for (k=0; k<bs; k++) {
673           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
674           col++;
675           a += bs;
676         }
677       }
678     }
679     /* copy over the B part */
680     Bloc = (Mat_SeqBAIJ*)baij->B->data;
681     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
682     for (i=0; i<mbs; i++) {
683 
684       rvals[0] = bs*(baij->rstartbs + i);
685       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
686       for (j=ai[i]; j<ai[i+1]; j++) {
687         col = baij->garray[aj[j]]*bs;
688         for (k=0; k<bs; k++) {
689           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
690           col++;
691           a += bs;
692         }
693       }
694     }
695     ierr = PetscFree(rvals);CHKERRQ(ierr);
696     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
697     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
698     /*
699        Everyone has to call to draw the matrix since the graphics waits are
700        synchronized across all processors that share the PetscDraw object
701     */
702     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
703     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
704     if (!rank) {
705       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
706       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
707     }
708     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
709     ierr = MatDestroy(&A);CHKERRQ(ierr);
710   }
711   PetscFunctionReturn(0);
712 }
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatView_MPISBAIJ"
716 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
717 {
718   PetscErrorCode ierr;
719   PetscBool      iascii,isdraw,issocket,isbinary;
720 
721   PetscFunctionBegin;
722   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
723   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
724   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
725   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
726   if (iascii || isdraw || issocket || isbinary) {
727     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
728   }
729   PetscFunctionReturn(0);
730 }
731 
732 #undef __FUNCT__
733 #define __FUNCT__ "MatDestroy_MPISBAIJ"
734 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
735 {
736   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
737   PetscErrorCode ierr;
738 
739   PetscFunctionBegin;
740 #if defined(PETSC_USE_LOG)
741   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
742 #endif
743   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
744   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
745   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
746   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
747 #if defined(PETSC_USE_CTABLE)
748   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
749 #else
750   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
751 #endif
752   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
753   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
754   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
755   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
756   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
757   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
758   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
759   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
760   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
761   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
762   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
763   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
765   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
767 #if defined(PETSC_USE_REAL_MAT_SINGLE)
768   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
769 #endif
770   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
771   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
772   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
773   ierr = PetscFree(mat->data);CHKERRQ(ierr);
774 
775   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
776   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
778   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
779   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
780   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
781 #if defined(PETSC_HAVE_ELEMENTAL)
782   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
783 #endif
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
789 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
790 {
791   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
792   PetscErrorCode ierr;
793   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
794   PetscScalar    *x,*from;
795 
796   PetscFunctionBegin;
797   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
798   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
799 
800   /* diagonal part */
801   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
802   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
803 
804   /* subdiagonal part */
805   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
806 
807   /* copy x into the vec slvec0 */
808   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
809   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
810 
811   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
812   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
813   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
814 
815   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
816   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
817   /* supperdiagonal part */
818   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatMult_MPISBAIJ"
824 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
825 {
826   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
827   PetscErrorCode    ierr;
828   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
829   PetscScalar       *from;
830   const PetscScalar *x;
831 
832   PetscFunctionBegin;
833   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
834   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
835 
836   /* diagonal part */
837   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
838   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
839 
840   /* subdiagonal part */
841   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
842 
843   /* copy x into the vec slvec0 */
844   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
845   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
846 
847   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
848   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
849   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
850 
851   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
852   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
853   /* supperdiagonal part */
854   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
860 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
861 {
862   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
863   PetscErrorCode ierr;
864   PetscInt       nt;
865 
866   PetscFunctionBegin;
867   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
868   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
869 
870   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
871   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
872 
873   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
874   /* do diagonal part */
875   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
876   /* do supperdiagonal part */
877   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
879   /* do subdiagonal part */
880   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
881   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
882   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
888 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
889 {
890   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
891   PetscErrorCode    ierr;
892   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
893   PetscScalar       *from,zero=0.0;
894   const PetscScalar *x;
895 
896   PetscFunctionBegin;
897   /*
898   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
899   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
900   */
901   /* diagonal part */
902   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
903   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
904 
905   /* subdiagonal part */
906   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
907 
908   /* copy x into the vec slvec0 */
909   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
910   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
911   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
912   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
913 
914   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
915   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
916   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917 
918   /* supperdiagonal part */
919   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
925 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
926 {
927   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
928   PetscErrorCode ierr;
929 
930   PetscFunctionBegin;
931   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932   /* do diagonal part */
933   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
934   /* do supperdiagonal part */
935   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
937 
938   /* do subdiagonal part */
939   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
940   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
941   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944 
945 /*
946   This only works correctly for square matrices where the subblock A->A is the
947    diagonal block
948 */
949 #undef __FUNCT__
950 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
951 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
952 {
953   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
954   PetscErrorCode ierr;
955 
956   PetscFunctionBegin;
957   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
958   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatScale_MPISBAIJ"
964 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
971   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatGetRow_MPISBAIJ"
977 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
978 {
979   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
980   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
981   PetscErrorCode ierr;
982   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
983   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
984   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
985 
986   PetscFunctionBegin;
987   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
988   mat->getrowactive = PETSC_TRUE;
989 
990   if (!mat->rowvalues && (idx || v)) {
991     /*
992         allocate enough space to hold information from the longest row.
993     */
994     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
995     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
996     PetscInt     max = 1,mbs = mat->mbs,tmp;
997     for (i=0; i<mbs; i++) {
998       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
999       if (max < tmp) max = tmp;
1000     }
1001     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1002   }
1003 
1004   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1005   lrow = row - brstart;  /* local row index */
1006 
1007   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1008   if (!v)   {pvA = 0; pvB = 0;}
1009   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1010   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1011   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1012   nztot = nzA + nzB;
1013 
1014   cmap = mat->garray;
1015   if (v  || idx) {
1016     if (nztot) {
1017       /* Sort by increasing column numbers, assuming A and B already sorted */
1018       PetscInt imark = -1;
1019       if (v) {
1020         *v = v_p = mat->rowvalues;
1021         for (i=0; i<nzB; i++) {
1022           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1023           else break;
1024         }
1025         imark = i;
1026         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1027         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1028       }
1029       if (idx) {
1030         *idx = idx_p = mat->rowindices;
1031         if (imark > -1) {
1032           for (i=0; i<imark; i++) {
1033             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1034           }
1035         } else {
1036           for (i=0; i<nzB; i++) {
1037             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1038             else break;
1039           }
1040           imark = i;
1041         }
1042         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1043         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1044       }
1045     } else {
1046       if (idx) *idx = 0;
1047       if (v)   *v   = 0;
1048     }
1049   }
1050   *nz  = nztot;
1051   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1052   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1058 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1059 {
1060   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1061 
1062   PetscFunctionBegin;
1063   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1064   baij->getrowactive = PETSC_FALSE;
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1070 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1071 {
1072   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1073   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1074 
1075   PetscFunctionBegin;
1076   aA->getrow_utriangular = PETSC_TRUE;
1077   PetscFunctionReturn(0);
1078 }
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1081 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1082 {
1083   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1084   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1085 
1086   PetscFunctionBegin;
1087   aA->getrow_utriangular = PETSC_FALSE;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1093 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1100   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1106 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1107 {
1108   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1113   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1118    Input: isrow       - distributed(parallel),
1119           iscol_local - locally owned (seq)
1120 */
1121 #undef __FUNCT__
1122 #define __FUNCT__ "ISEqual_private"
1123 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1124 {
1125   PetscErrorCode ierr;
1126   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1127   const PetscInt *ptr1,*ptr2;
1128 
1129   PetscFunctionBegin;
1130   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1131   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1132   if (sz1 > sz2) {
1133     *flg = PETSC_FALSE;
1134     PetscFunctionReturn(0);
1135   }
1136 
1137   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1138   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1139 
1140   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1141   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1142   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1143   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1144   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1145   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1146 
1147   nmatch=0;
1148   k     = 0;
1149   for (i=0; i<sz1; i++){
1150     for (j=k; j<sz2; j++){
1151       if (a1[i] == a2[j]) {
1152         k = j; nmatch++;
1153         break;
1154       }
1155     }
1156   }
1157   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1158   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1159   ierr = PetscFree(a1);CHKERRQ(ierr);
1160   ierr = PetscFree(a2);CHKERRQ(ierr);
1161   if (nmatch < sz1) {
1162     *flg = PETSC_FALSE;
1163   } else {
1164     *flg = PETSC_TRUE;
1165   }
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1171 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1172 {
1173   PetscErrorCode ierr;
1174   IS             iscol_local;
1175   PetscInt       csize;
1176   PetscBool      isequal;
1177 
1178   PetscFunctionBegin;
1179   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1180   if (call == MAT_REUSE_MATRIX) {
1181     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1182     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1183   } else {
1184     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1185     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1186     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1187   }
1188 
1189   /* now call MatGetSubMatrix_MPIBAIJ() */
1190   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1191   if (call == MAT_INITIAL_MATRIX) {
1192     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1193     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1194   }
1195   PetscFunctionReturn(0);
1196 }
1197 
1198 #undef __FUNCT__
1199 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1200 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1201 {
1202   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1207   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1213 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1214 {
1215   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1216   Mat            A  = a->A,B = a->B;
1217   PetscErrorCode ierr;
1218   PetscReal      isend[5],irecv[5];
1219 
1220   PetscFunctionBegin;
1221   info->block_size = (PetscReal)matin->rmap->bs;
1222 
1223   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1224 
1225   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1226   isend[3] = info->memory;  isend[4] = info->mallocs;
1227 
1228   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1229 
1230   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1231   isend[3] += info->memory;  isend[4] += info->mallocs;
1232   if (flag == MAT_LOCAL) {
1233     info->nz_used      = isend[0];
1234     info->nz_allocated = isend[1];
1235     info->nz_unneeded  = isend[2];
1236     info->memory       = isend[3];
1237     info->mallocs      = isend[4];
1238   } else if (flag == MAT_GLOBAL_MAX) {
1239     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1240 
1241     info->nz_used      = irecv[0];
1242     info->nz_allocated = irecv[1];
1243     info->nz_unneeded  = irecv[2];
1244     info->memory       = irecv[3];
1245     info->mallocs      = irecv[4];
1246   } else if (flag == MAT_GLOBAL_SUM) {
1247     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1248 
1249     info->nz_used      = irecv[0];
1250     info->nz_allocated = irecv[1];
1251     info->nz_unneeded  = irecv[2];
1252     info->memory       = irecv[3];
1253     info->mallocs      = irecv[4];
1254   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1255   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1256   info->fill_ratio_needed = 0;
1257   info->factor_mallocs    = 0;
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1263 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1264 {
1265   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1266   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270   switch (op) {
1271   case MAT_NEW_NONZERO_LOCATIONS:
1272   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1273   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1274   case MAT_KEEP_NONZERO_PATTERN:
1275   case MAT_NEW_NONZERO_LOCATION_ERR:
1276     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1277     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1278     break;
1279   case MAT_ROW_ORIENTED:
1280     a->roworiented = flg;
1281 
1282     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1283     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1284     break;
1285   case MAT_NEW_DIAGONALS:
1286     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1287     break;
1288   case MAT_IGNORE_OFF_PROC_ENTRIES:
1289     a->donotstash = flg;
1290     break;
1291   case MAT_USE_HASH_TABLE:
1292     a->ht_flag = flg;
1293     break;
1294   case MAT_HERMITIAN:
1295     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1296     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1297 
1298     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1299     break;
1300   case MAT_SPD:
1301     A->spd_set = PETSC_TRUE;
1302     A->spd     = flg;
1303     if (flg) {
1304       A->symmetric                  = PETSC_TRUE;
1305       A->structurally_symmetric     = PETSC_TRUE;
1306       A->symmetric_set              = PETSC_TRUE;
1307       A->structurally_symmetric_set = PETSC_TRUE;
1308     }
1309     break;
1310   case MAT_SYMMETRIC:
1311     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1312     break;
1313   case MAT_STRUCTURALLY_SYMMETRIC:
1314     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1315     break;
1316   case MAT_SYMMETRY_ETERNAL:
1317     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1318     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1319     break;
1320   case MAT_IGNORE_LOWER_TRIANGULAR:
1321     aA->ignore_ltriangular = flg;
1322     break;
1323   case MAT_ERROR_LOWER_TRIANGULAR:
1324     aA->ignore_ltriangular = flg;
1325     break;
1326   case MAT_GETROW_UPPERTRIANGULAR:
1327     aA->getrow_utriangular = flg;
1328     break;
1329   default:
1330     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1331   }
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1337 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1338 {
1339   PetscErrorCode ierr;
1340 
1341   PetscFunctionBegin;
1342   if (MAT_INITIAL_MATRIX || *B != A) {
1343     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1344   }
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1350 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1351 {
1352   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1353   Mat            a     = baij->A, b=baij->B;
1354   PetscErrorCode ierr;
1355   PetscInt       nv,m,n;
1356   PetscBool      flg;
1357 
1358   PetscFunctionBegin;
1359   if (ll != rr) {
1360     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1361     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1362   }
1363   if (!ll) PetscFunctionReturn(0);
1364 
1365   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1366   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1367 
1368   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1369   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1370 
1371   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1372 
1373   /* left diagonalscale the off-diagonal part */
1374   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1375 
1376   /* scale the diagonal part */
1377   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1378 
1379   /* right diagonalscale the off-diagonal part */
1380   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1381   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 #undef __FUNCT__
1386 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1387 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1388 {
1389   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1390   PetscErrorCode ierr;
1391 
1392   PetscFunctionBegin;
1393   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatEqual_MPISBAIJ"
1401 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1402 {
1403   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1404   Mat            a,b,c,d;
1405   PetscBool      flg;
1406   PetscErrorCode ierr;
1407 
1408   PetscFunctionBegin;
1409   a = matA->A; b = matA->B;
1410   c = matB->A; d = matB->B;
1411 
1412   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1413   if (flg) {
1414     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1415   }
1416   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatCopy_MPISBAIJ"
1422 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1423 {
1424   PetscErrorCode ierr;
1425   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1426   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1427 
1428   PetscFunctionBegin;
1429   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1430   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1431     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1432     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1433     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1434   } else {
1435     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1436     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1437   }
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1443 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1444 {
1445   PetscErrorCode ierr;
1446 
1447   PetscFunctionBegin;
1448   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1454 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1455 {
1456   PetscErrorCode ierr;
1457   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1458   PetscBLASInt   bnz,one=1;
1459   Mat_SeqSBAIJ   *xa,*ya;
1460   Mat_SeqBAIJ    *xb,*yb;
1461 
1462   PetscFunctionBegin;
1463   if (str == SAME_NONZERO_PATTERN) {
1464     PetscScalar alpha = a;
1465     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1466     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1467     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1468     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1469     xb   = (Mat_SeqBAIJ*)xx->B->data;
1470     yb   = (Mat_SeqBAIJ*)yy->B->data;
1471     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1472     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1473     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1474   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1475     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1476     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1477     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1478   } else {
1479     Mat      B;
1480     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1481     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1482     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1483     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1484     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1485     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1486     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1487     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1488     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1489     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1490     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1491     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1492     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1493     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1494     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1495     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1496     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1497     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1498     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1499     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1506 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1507 {
1508   PetscErrorCode ierr;
1509   PetscInt       i;
1510   PetscBool      flg;
1511 
1512   PetscFunctionBegin;
1513   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1514   for (i=0; i<n; i++) {
1515     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1516     if (!flg) { /* *B[i] is non-symmetric, set flag */
1517       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1518     }
1519   }
1520   PetscFunctionReturn(0);
1521 }
1522 
1523 /* -------------------------------------------------------------------*/
1524 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1525                                        MatGetRow_MPISBAIJ,
1526                                        MatRestoreRow_MPISBAIJ,
1527                                        MatMult_MPISBAIJ,
1528                                /*  4*/ MatMultAdd_MPISBAIJ,
1529                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1530                                        MatMultAdd_MPISBAIJ,
1531                                        0,
1532                                        0,
1533                                        0,
1534                                /* 10*/ 0,
1535                                        0,
1536                                        0,
1537                                        MatSOR_MPISBAIJ,
1538                                        MatTranspose_MPISBAIJ,
1539                                /* 15*/ MatGetInfo_MPISBAIJ,
1540                                        MatEqual_MPISBAIJ,
1541                                        MatGetDiagonal_MPISBAIJ,
1542                                        MatDiagonalScale_MPISBAIJ,
1543                                        MatNorm_MPISBAIJ,
1544                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1545                                        MatAssemblyEnd_MPISBAIJ,
1546                                        MatSetOption_MPISBAIJ,
1547                                        MatZeroEntries_MPISBAIJ,
1548                                /* 24*/ 0,
1549                                        0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                /* 29*/ MatSetUp_MPISBAIJ,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                /* 34*/ MatDuplicate_MPISBAIJ,
1559                                        0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                /* 39*/ MatAXPY_MPISBAIJ,
1564                                        MatGetSubMatrices_MPISBAIJ,
1565                                        MatIncreaseOverlap_MPISBAIJ,
1566                                        MatGetValues_MPISBAIJ,
1567                                        MatCopy_MPISBAIJ,
1568                                /* 44*/ 0,
1569                                        MatScale_MPISBAIJ,
1570                                        0,
1571                                        0,
1572                                        0,
1573                                /* 49*/ 0,
1574                                        0,
1575                                        0,
1576                                        0,
1577                                        0,
1578                                /* 54*/ 0,
1579                                        0,
1580                                        MatSetUnfactored_MPISBAIJ,
1581                                        0,
1582                                        MatSetValuesBlocked_MPISBAIJ,
1583                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1584                                        0,
1585                                        0,
1586                                        0,
1587                                        0,
1588                                /* 64*/ 0,
1589                                        0,
1590                                        0,
1591                                        0,
1592                                        0,
1593                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1594                                        0,
1595                                        0,
1596                                        0,
1597                                        0,
1598                                /* 74*/ 0,
1599                                        0,
1600                                        0,
1601                                        0,
1602                                        0,
1603                                /* 79*/ 0,
1604                                        0,
1605                                        0,
1606                                        0,
1607                                        MatLoad_MPISBAIJ,
1608                                /* 84*/ 0,
1609                                        0,
1610                                        0,
1611                                        0,
1612                                        0,
1613                                /* 89*/ 0,
1614                                        0,
1615                                        0,
1616                                        0,
1617                                        0,
1618                                /* 94*/ 0,
1619                                        0,
1620                                        0,
1621                                        0,
1622                                        0,
1623                                /* 99*/ 0,
1624                                        0,
1625                                        0,
1626                                        0,
1627                                        0,
1628                                /*104*/ 0,
1629                                        MatRealPart_MPISBAIJ,
1630                                        MatImaginaryPart_MPISBAIJ,
1631                                        MatGetRowUpperTriangular_MPISBAIJ,
1632                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1633                                /*109*/ 0,
1634                                        0,
1635                                        0,
1636                                        0,
1637                                        0,
1638                                /*114*/ 0,
1639                                        0,
1640                                        0,
1641                                        0,
1642                                        0,
1643                                /*119*/ 0,
1644                                        0,
1645                                        0,
1646                                        0,
1647                                        0,
1648                                /*124*/ 0,
1649                                        0,
1650                                        0,
1651                                        0,
1652                                        0,
1653                                /*129*/ 0,
1654                                        0,
1655                                        0,
1656                                        0,
1657                                        0,
1658                                /*134*/ 0,
1659                                        0,
1660                                        0,
1661                                        0,
1662                                        0,
1663                                /*139*/ 0,
1664                                        0,
1665                                        0,
1666                                        0,
1667                                        0,
1668                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1669 };
1670 
1671 #undef __FUNCT__
1672 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1673 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1674 {
1675   PetscFunctionBegin;
1676   *a = ((Mat_MPISBAIJ*)A->data)->A;
1677   PetscFunctionReturn(0);
1678 }
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1682 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1683 {
1684   Mat_MPISBAIJ   *b;
1685   PetscErrorCode ierr;
1686   PetscInt       i,mbs,Mbs;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1690   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1691   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1692   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1693 
1694   b   = (Mat_MPISBAIJ*)B->data;
1695   mbs = B->rmap->n/bs;
1696   Mbs = B->rmap->N/bs;
1697   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1698 
1699   B->rmap->bs = bs;
1700   b->bs2      = bs*bs;
1701   b->mbs      = mbs;
1702   b->Mbs      = Mbs;
1703   b->nbs      = B->cmap->n/bs;
1704   b->Nbs      = B->cmap->N/bs;
1705 
1706   for (i=0; i<=b->size; i++) {
1707     b->rangebs[i] = B->rmap->range[i]/bs;
1708   }
1709   b->rstartbs = B->rmap->rstart/bs;
1710   b->rendbs   = B->rmap->rend/bs;
1711 
1712   b->cstartbs = B->cmap->rstart/bs;
1713   b->cendbs   = B->cmap->rend/bs;
1714 
1715   if (!B->preallocated) {
1716     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1717     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1718     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1719     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1720     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1721     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1722     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1723     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1724     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1725   }
1726 
1727   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1728   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1729 
1730   B->preallocated = PETSC_TRUE;
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1736 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1737 {
1738   PetscInt       m,rstart,cstart,cend;
1739   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1740   const PetscInt *JJ    =0;
1741   PetscScalar    *values=0;
1742   PetscErrorCode ierr;
1743 
1744   PetscFunctionBegin;
1745   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1746   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1747   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1748   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1749   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1750   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1751   m      = B->rmap->n/bs;
1752   rstart = B->rmap->rstart/bs;
1753   cstart = B->cmap->rstart/bs;
1754   cend   = B->cmap->rend/bs;
1755 
1756   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1757   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1758   for (i=0; i<m; i++) {
1759     nz = ii[i+1] - ii[i];
1760     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1761     nz_max = PetscMax(nz_max,nz);
1762     JJ     = jj + ii[i];
1763     for (j=0; j<nz; j++) {
1764       if (*JJ >= cstart) break;
1765       JJ++;
1766     }
1767     d = 0;
1768     for (; j<nz; j++) {
1769       if (*JJ++ >= cend) break;
1770       d++;
1771     }
1772     d_nnz[i] = d;
1773     o_nnz[i] = nz - d;
1774   }
1775   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1776   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1777 
1778   values = (PetscScalar*)V;
1779   if (!values) {
1780     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1781     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1782   }
1783   for (i=0; i<m; i++) {
1784     PetscInt          row    = i + rstart;
1785     PetscInt          ncols  = ii[i+1] - ii[i];
1786     const PetscInt    *icols = jj + ii[i];
1787     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1788     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1789   }
1790 
1791   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1792   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1793   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 /*MC
1799    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1800    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1801    the matrix is stored.
1802 
1803   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1804   can call MatSetOption(Mat, MAT_HERMITIAN);
1805 
1806    Options Database Keys:
1807 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1808 
1809   Level: beginner
1810 
1811 .seealso: MatCreateMPISBAIJ
1812 M*/
1813 
1814 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatCreate_MPISBAIJ"
1818 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1819 {
1820   Mat_MPISBAIJ   *b;
1821   PetscErrorCode ierr;
1822   PetscBool      flg = PETSC_FALSE;
1823 
1824   PetscFunctionBegin;
1825   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1826   B->data = (void*)b;
1827   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1828 
1829   B->ops->destroy = MatDestroy_MPISBAIJ;
1830   B->ops->view    = MatView_MPISBAIJ;
1831   B->assembled    = PETSC_FALSE;
1832   B->insertmode   = NOT_SET_VALUES;
1833 
1834   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1835   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1836 
1837   /* build local table of row and column ownerships */
1838   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1839 
1840   /* build cache for off array entries formed */
1841   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1842 
1843   b->donotstash  = PETSC_FALSE;
1844   b->colmap      = NULL;
1845   b->garray      = NULL;
1846   b->roworiented = PETSC_TRUE;
1847 
1848   /* stuff used in block assembly */
1849   b->barray = 0;
1850 
1851   /* stuff used for matrix vector multiply */
1852   b->lvec    = 0;
1853   b->Mvctx   = 0;
1854   b->slvec0  = 0;
1855   b->slvec0b = 0;
1856   b->slvec1  = 0;
1857   b->slvec1a = 0;
1858   b->slvec1b = 0;
1859   b->sMvctx  = 0;
1860 
1861   /* stuff for MatGetRow() */
1862   b->rowindices   = 0;
1863   b->rowvalues    = 0;
1864   b->getrowactive = PETSC_FALSE;
1865 
1866   /* hash table stuff */
1867   b->ht           = 0;
1868   b->hd           = 0;
1869   b->ht_size      = 0;
1870   b->ht_flag      = PETSC_FALSE;
1871   b->ht_fact      = 0;
1872   b->ht_total_ct  = 0;
1873   b->ht_insert_ct = 0;
1874 
1875   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1876   b->ijonly = PETSC_FALSE;
1877 
1878   b->in_loc = 0;
1879   b->v_loc  = 0;
1880   b->n_loc  = 0;
1881 
1882   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1884   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1885   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1887   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1888 #if defined(PETSC_HAVE_ELEMENTAL)
1889   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
1890 #endif
1891 
1892   B->symmetric                  = PETSC_TRUE;
1893   B->structurally_symmetric     = PETSC_TRUE;
1894   B->symmetric_set              = PETSC_TRUE;
1895   B->structurally_symmetric_set = PETSC_TRUE;
1896 
1897   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1898   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1899   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1900   if (flg) {
1901     PetscReal fact = 1.39;
1902     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1903     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1904     if (fact <= 1.0) fact = 1.39;
1905     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1906     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1907   }
1908   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 /*MC
1913    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1914 
1915    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1916    and MATMPISBAIJ otherwise.
1917 
1918    Options Database Keys:
1919 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1920 
1921   Level: beginner
1922 
1923 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1924 M*/
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1928 /*@C
1929    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1930    the user should preallocate the matrix storage by setting the parameters
1931    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1932    performance can be increased by more than a factor of 50.
1933 
1934    Collective on Mat
1935 
1936    Input Parameters:
1937 +  B - the matrix
1938 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1939           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1940 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1941            submatrix  (same for all local rows)
1942 .  d_nnz - array containing the number of block nonzeros in the various block rows
1943            in the upper triangular and diagonal part of the in diagonal portion of the local
1944            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1945            for the diagonal entry and set a value even if it is zero.
1946 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1947            submatrix (same for all local rows).
1948 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1949            off-diagonal portion of the local submatrix that is right of the diagonal
1950            (possibly different for each block row) or NULL.
1951 
1952 
1953    Options Database Keys:
1954 .   -mat_no_unroll - uses code that does not unroll the loops in the
1955                      block calculations (much slower)
1956 .   -mat_block_size - size of the blocks to use
1957 
1958    Notes:
1959 
1960    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1961    than it must be used on all processors that share the object for that argument.
1962 
1963    If the *_nnz parameter is given then the *_nz parameter is ignored
1964 
1965    Storage Information:
1966    For a square global matrix we define each processor's diagonal portion
1967    to be its local rows and the corresponding columns (a square submatrix);
1968    each processor's off-diagonal portion encompasses the remainder of the
1969    local matrix (a rectangular submatrix).
1970 
1971    The user can specify preallocated storage for the diagonal part of
1972    the local submatrix with either d_nz or d_nnz (not both).  Set
1973    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1974    memory allocation.  Likewise, specify preallocated storage for the
1975    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1976 
1977    You can call MatGetInfo() to get information on how effective the preallocation was;
1978    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1979    You can also run with the option -info and look for messages with the string
1980    malloc in them to see if additional memory allocation was needed.
1981 
1982    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1983    the figure below we depict these three local rows and all columns (0-11).
1984 
1985 .vb
1986            0 1 2 3 4 5 6 7 8 9 10 11
1987           --------------------------
1988    row 3  |. . . d d d o o o o  o  o
1989    row 4  |. . . d d d o o o o  o  o
1990    row 5  |. . . d d d o o o o  o  o
1991           --------------------------
1992 .ve
1993 
1994    Thus, any entries in the d locations are stored in the d (diagonal)
1995    submatrix, and any entries in the o locations are stored in the
1996    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1997    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1998 
1999    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2000    plus the diagonal part of the d matrix,
2001    and o_nz should indicate the number of block nonzeros per row in the o matrix
2002 
2003    In general, for PDE problems in which most nonzeros are near the diagonal,
2004    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2005    or you will get TERRIBLE performance; see the users' manual chapter on
2006    matrices.
2007 
2008    Level: intermediate
2009 
2010 .keywords: matrix, block, aij, compressed row, sparse, parallel
2011 
2012 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2013 @*/
2014 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2015 {
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2020   PetscValidType(B,1);
2021   PetscValidLogicalCollectiveInt(B,bs,2);
2022   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatCreateSBAIJ"
2028 /*@C
2029    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2030    (block compressed row).  For good matrix assembly performance
2031    the user should preallocate the matrix storage by setting the parameters
2032    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2033    performance can be increased by more than a factor of 50.
2034 
2035    Collective on MPI_Comm
2036 
2037    Input Parameters:
2038 +  comm - MPI communicator
2039 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2040           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2041 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2042            This value should be the same as the local size used in creating the
2043            y vector for the matrix-vector product y = Ax.
2044 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2045            This value should be the same as the local size used in creating the
2046            x vector for the matrix-vector product y = Ax.
2047 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2048 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2049 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2050            submatrix  (same for all local rows)
2051 .  d_nnz - array containing the number of block nonzeros in the various block rows
2052            in the upper triangular portion of the in diagonal portion of the local
2053            (possibly different for each block block row) or NULL.
2054            If you plan to factor the matrix you must leave room for the diagonal entry and
2055            set its value even if it is zero.
2056 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2057            submatrix (same for all local rows).
2058 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2059            off-diagonal portion of the local submatrix (possibly different for
2060            each block row) or NULL.
2061 
2062    Output Parameter:
2063 .  A - the matrix
2064 
2065    Options Database Keys:
2066 .   -mat_no_unroll - uses code that does not unroll the loops in the
2067                      block calculations (much slower)
2068 .   -mat_block_size - size of the blocks to use
2069 .   -mat_mpi - use the parallel matrix data structures even on one processor
2070                (defaults to using SeqBAIJ format on one processor)
2071 
2072    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2073    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2074    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2075 
2076    Notes:
2077    The number of rows and columns must be divisible by blocksize.
2078    This matrix type does not support complex Hermitian operation.
2079 
2080    The user MUST specify either the local or global matrix dimensions
2081    (possibly both).
2082 
2083    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2084    than it must be used on all processors that share the object for that argument.
2085 
2086    If the *_nnz parameter is given then the *_nz parameter is ignored
2087 
2088    Storage Information:
2089    For a square global matrix we define each processor's diagonal portion
2090    to be its local rows and the corresponding columns (a square submatrix);
2091    each processor's off-diagonal portion encompasses the remainder of the
2092    local matrix (a rectangular submatrix).
2093 
2094    The user can specify preallocated storage for the diagonal part of
2095    the local submatrix with either d_nz or d_nnz (not both).  Set
2096    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2097    memory allocation.  Likewise, specify preallocated storage for the
2098    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2099 
2100    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2101    the figure below we depict these three local rows and all columns (0-11).
2102 
2103 .vb
2104            0 1 2 3 4 5 6 7 8 9 10 11
2105           --------------------------
2106    row 3  |. . . d d d o o o o  o  o
2107    row 4  |. . . d d d o o o o  o  o
2108    row 5  |. . . d d d o o o o  o  o
2109           --------------------------
2110 .ve
2111 
2112    Thus, any entries in the d locations are stored in the d (diagonal)
2113    submatrix, and any entries in the o locations are stored in the
2114    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2115    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2116 
2117    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2118    plus the diagonal part of the d matrix,
2119    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2120    In general, for PDE problems in which most nonzeros are near the diagonal,
2121    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2122    or you will get TERRIBLE performance; see the users' manual chapter on
2123    matrices.
2124 
2125    Level: intermediate
2126 
2127 .keywords: matrix, block, aij, compressed row, sparse, parallel
2128 
2129 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2130 @*/
2131 
2132 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2133 {
2134   PetscErrorCode ierr;
2135   PetscMPIInt    size;
2136 
2137   PetscFunctionBegin;
2138   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2139   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2140   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2141   if (size > 1) {
2142     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2143     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2144   } else {
2145     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2146     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2147   }
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2154 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2155 {
2156   Mat            mat;
2157   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2158   PetscErrorCode ierr;
2159   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2160   PetscScalar    *array;
2161 
2162   PetscFunctionBegin;
2163   *newmat = 0;
2164 
2165   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2166   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2167   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2168   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2169   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2170   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2171 
2172   mat->factortype   = matin->factortype;
2173   mat->preallocated = PETSC_TRUE;
2174   mat->assembled    = PETSC_TRUE;
2175   mat->insertmode   = NOT_SET_VALUES;
2176 
2177   a      = (Mat_MPISBAIJ*)mat->data;
2178   a->bs2 = oldmat->bs2;
2179   a->mbs = oldmat->mbs;
2180   a->nbs = oldmat->nbs;
2181   a->Mbs = oldmat->Mbs;
2182   a->Nbs = oldmat->Nbs;
2183 
2184 
2185   a->size         = oldmat->size;
2186   a->rank         = oldmat->rank;
2187   a->donotstash   = oldmat->donotstash;
2188   a->roworiented  = oldmat->roworiented;
2189   a->rowindices   = 0;
2190   a->rowvalues    = 0;
2191   a->getrowactive = PETSC_FALSE;
2192   a->barray       = 0;
2193   a->rstartbs     = oldmat->rstartbs;
2194   a->rendbs       = oldmat->rendbs;
2195   a->cstartbs     = oldmat->cstartbs;
2196   a->cendbs       = oldmat->cendbs;
2197 
2198   /* hash table stuff */
2199   a->ht           = 0;
2200   a->hd           = 0;
2201   a->ht_size      = 0;
2202   a->ht_flag      = oldmat->ht_flag;
2203   a->ht_fact      = oldmat->ht_fact;
2204   a->ht_total_ct  = 0;
2205   a->ht_insert_ct = 0;
2206 
2207   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2208   if (oldmat->colmap) {
2209 #if defined(PETSC_USE_CTABLE)
2210     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2211 #else
2212     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2213     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2214     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2215 #endif
2216   } else a->colmap = 0;
2217 
2218   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2219     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2220     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2221     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2222   } else a->garray = 0;
2223 
2224   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2225   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2226   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2227   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2228   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2229 
2230   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2231   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2232   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2233   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2234 
2235   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2236   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2237   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2238   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2239   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2240   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2241   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2242   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2243   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2244   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2245   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2246   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2247   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2248 
2249   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2250   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2251   a->sMvctx = oldmat->sMvctx;
2252   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2253 
2254   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2255   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2256   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2257   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2258   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2259   *newmat = mat;
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatLoad_MPISBAIJ"
2265 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2266 {
2267   PetscErrorCode ierr;
2268   PetscInt       i,nz,j,rstart,rend;
2269   PetscScalar    *vals,*buf;
2270   MPI_Comm       comm;
2271   MPI_Status     status;
2272   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2273   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2274   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2275   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2276   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2277   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2278   int            fd;
2279 
2280   PetscFunctionBegin;
2281   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2282   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2283   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2284   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2285   if (bs < 0) bs = 1;
2286 
2287   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2288   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2289   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2290   if (!rank) {
2291     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2292     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2293     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2294   }
2295 
2296   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2297 
2298   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2299   M    = header[1];
2300   N    = header[2];
2301 
2302   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2303   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2304   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2305 
2306   /* If global sizes are set, check if they are consistent with that given in the file */
2307   if (sizesset) {
2308     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2309   }
2310   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2311   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2312 
2313   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2314 
2315   /*
2316      This code adds extra rows to make sure the number of rows is
2317      divisible by the blocksize
2318   */
2319   Mbs        = M/bs;
2320   extra_rows = bs - M + bs*(Mbs);
2321   if (extra_rows == bs) extra_rows = 0;
2322   else                  Mbs++;
2323   if (extra_rows &&!rank) {
2324     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2325   }
2326 
2327   /* determine ownership of all rows */
2328   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2329     mbs = Mbs/size + ((Mbs % size) > rank);
2330     m   = mbs*bs;
2331   } else { /* User Set */
2332     m   = newmat->rmap->n;
2333     mbs = m/bs;
2334   }
2335   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2336   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2337   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2338   rowners[0] = 0;
2339   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2340   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2341   rstart = rowners[rank];
2342   rend   = rowners[rank+1];
2343 
2344   /* distribute row lengths to all processors */
2345   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2346   if (!rank) {
2347     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2348     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2349     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2350     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2351     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2352     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2353     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2354   } else {
2355     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2356   }
2357 
2358   if (!rank) {   /* procs[0] */
2359     /* calculate the number of nonzeros on each processor */
2360     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2361     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2362     for (i=0; i<size; i++) {
2363       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2364         procsnz[i] += rowlengths[j];
2365       }
2366     }
2367     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2368 
2369     /* determine max buffer needed and allocate it */
2370     maxnz = 0;
2371     for (i=0; i<size; i++) {
2372       maxnz = PetscMax(maxnz,procsnz[i]);
2373     }
2374     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2375 
2376     /* read in my part of the matrix column indices  */
2377     nz     = procsnz[0];
2378     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2379     mycols = ibuf;
2380     if (size == 1) nz -= extra_rows;
2381     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2382     if (size == 1) {
2383       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2384     }
2385 
2386     /* read in every ones (except the last) and ship off */
2387     for (i=1; i<size-1; i++) {
2388       nz   = procsnz[i];
2389       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2390       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2391     }
2392     /* read in the stuff for the last proc */
2393     if (size != 1) {
2394       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2395       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2396       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2397       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2398     }
2399     ierr = PetscFree(cols);CHKERRQ(ierr);
2400   } else {  /* procs[i], i>0 */
2401     /* determine buffer space needed for message */
2402     nz = 0;
2403     for (i=0; i<m; i++) nz += locrowlens[i];
2404     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2405     mycols = ibuf;
2406     /* receive message of column indices*/
2407     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2408     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2409     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2410   }
2411 
2412   /* loop over local rows, determining number of off diagonal entries */
2413   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2414   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2415   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2416   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2417   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2418   rowcount = 0;
2419   nzcount  = 0;
2420   for (i=0; i<mbs; i++) {
2421     dcount  = 0;
2422     odcount = 0;
2423     for (j=0; j<bs; j++) {
2424       kmax = locrowlens[rowcount];
2425       for (k=0; k<kmax; k++) {
2426         tmp = mycols[nzcount++]/bs; /* block col. index */
2427         if (!mask[tmp]) {
2428           mask[tmp] = 1;
2429           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2430           else masked1[dcount++] = tmp; /* entry in diag portion */
2431         }
2432       }
2433       rowcount++;
2434     }
2435 
2436     dlens[i]  = dcount;  /* d_nzz[i] */
2437     odlens[i] = odcount; /* o_nzz[i] */
2438 
2439     /* zero out the mask elements we set */
2440     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2441     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2442   }
2443   if (!sizesset) {
2444     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2445   }
2446   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2447   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2448 
2449   if (!rank) {
2450     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2451     /* read in my part of the matrix numerical values  */
2452     nz     = procsnz[0];
2453     vals   = buf;
2454     mycols = ibuf;
2455     if (size == 1) nz -= extra_rows;
2456     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2457     if (size == 1) {
2458       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2459     }
2460 
2461     /* insert into matrix */
2462     jj = rstart*bs;
2463     for (i=0; i<m; i++) {
2464       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2465       mycols += locrowlens[i];
2466       vals   += locrowlens[i];
2467       jj++;
2468     }
2469 
2470     /* read in other processors (except the last one) and ship out */
2471     for (i=1; i<size-1; i++) {
2472       nz   = procsnz[i];
2473       vals = buf;
2474       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2475       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2476     }
2477     /* the last proc */
2478     if (size != 1) {
2479       nz   = procsnz[i] - extra_rows;
2480       vals = buf;
2481       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2482       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2483       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2484     }
2485     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2486 
2487   } else {
2488     /* receive numeric values */
2489     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2490 
2491     /* receive message of values*/
2492     vals   = buf;
2493     mycols = ibuf;
2494     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2495     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2496     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2497 
2498     /* insert into matrix */
2499     jj = rstart*bs;
2500     for (i=0; i<m; i++) {
2501       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2502       mycols += locrowlens[i];
2503       vals   += locrowlens[i];
2504       jj++;
2505     }
2506   }
2507 
2508   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2509   ierr = PetscFree(buf);CHKERRQ(ierr);
2510   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2511   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2512   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2513   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2514   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2515   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2521 /*XXXXX@
2522    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2523 
2524    Input Parameters:
2525 .  mat  - the matrix
2526 .  fact - factor
2527 
2528    Not Collective on Mat, each process can have a different hash factor
2529 
2530    Level: advanced
2531 
2532   Notes:
2533    This can also be set by the command line option: -mat_use_hash_table fact
2534 
2535 .keywords: matrix, hashtable, factor, HT
2536 
2537 .seealso: MatSetOption()
2538 @XXXXX*/
2539 
2540 
2541 #undef __FUNCT__
2542 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2543 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2544 {
2545   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2546   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2547   PetscReal      atmp;
2548   PetscReal      *work,*svalues,*rvalues;
2549   PetscErrorCode ierr;
2550   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2551   PetscMPIInt    rank,size;
2552   PetscInt       *rowners_bs,dest,count,source;
2553   PetscScalar    *va;
2554   MatScalar      *ba;
2555   MPI_Status     stat;
2556 
2557   PetscFunctionBegin;
2558   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2559   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2560   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2561 
2562   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2563   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2564 
2565   bs  = A->rmap->bs;
2566   mbs = a->mbs;
2567   Mbs = a->Mbs;
2568   ba  = b->a;
2569   bi  = b->i;
2570   bj  = b->j;
2571 
2572   /* find ownerships */
2573   rowners_bs = A->rmap->range;
2574 
2575   /* each proc creates an array to be distributed */
2576   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2577   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2578 
2579   /* row_max for B */
2580   if (rank != size-1) {
2581     for (i=0; i<mbs; i++) {
2582       ncols = bi[1] - bi[0]; bi++;
2583       brow  = bs*i;
2584       for (j=0; j<ncols; j++) {
2585         bcol = bs*(*bj);
2586         for (kcol=0; kcol<bs; kcol++) {
2587           col  = bcol + kcol;                /* local col index */
2588           col += rowners_bs[rank+1];      /* global col index */
2589           for (krow=0; krow<bs; krow++) {
2590             atmp = PetscAbsScalar(*ba); ba++;
2591             row  = brow + krow;   /* local row index */
2592             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2593             if (work[col] < atmp) work[col] = atmp;
2594           }
2595         }
2596         bj++;
2597       }
2598     }
2599 
2600     /* send values to its owners */
2601     for (dest=rank+1; dest<size; dest++) {
2602       svalues = work + rowners_bs[dest];
2603       count   = rowners_bs[dest+1]-rowners_bs[dest];
2604       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2605     }
2606   }
2607 
2608   /* receive values */
2609   if (rank) {
2610     rvalues = work;
2611     count   = rowners_bs[rank+1]-rowners_bs[rank];
2612     for (source=0; source<rank; source++) {
2613       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2614       /* process values */
2615       for (i=0; i<count; i++) {
2616         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2617       }
2618     }
2619   }
2620 
2621   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2622   ierr = PetscFree(work);CHKERRQ(ierr);
2623   PetscFunctionReturn(0);
2624 }
2625 
2626 #undef __FUNCT__
2627 #define __FUNCT__ "MatSOR_MPISBAIJ"
2628 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2629 {
2630   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2631   PetscErrorCode    ierr;
2632   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2633   PetscScalar       *x,*ptr,*from;
2634   Vec               bb1;
2635   const PetscScalar *b;
2636 
2637   PetscFunctionBegin;
2638   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2639   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2640 
2641   if (flag == SOR_APPLY_UPPER) {
2642     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2643     PetscFunctionReturn(0);
2644   }
2645 
2646   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2647     if (flag & SOR_ZERO_INITIAL_GUESS) {
2648       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2649       its--;
2650     }
2651 
2652     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2653     while (its--) {
2654 
2655       /* lower triangular part: slvec0b = - B^T*xx */
2656       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2657 
2658       /* copy xx into slvec0a */
2659       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2660       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2661       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2662       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2663 
2664       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2665 
2666       /* copy bb into slvec1a */
2667       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2668       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2669       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2670       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2671 
2672       /* set slvec1b = 0 */
2673       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2674 
2675       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2676       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2677       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2678       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2679 
2680       /* upper triangular part: bb1 = bb1 - B*x */
2681       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2682 
2683       /* local diagonal sweep */
2684       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2685     }
2686     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2687   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2688     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2689   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2690     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2691   } else if (flag & SOR_EISENSTAT) {
2692     Vec               xx1;
2693     PetscBool         hasop;
2694     const PetscScalar *diag;
2695     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2696     PetscInt          i,n;
2697 
2698     if (!mat->xx1) {
2699       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2700       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2701     }
2702     xx1 = mat->xx1;
2703     bb1 = mat->bb1;
2704 
2705     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2706 
2707     if (!mat->diag) {
2708       /* this is wrong for same matrix with new nonzero values */
2709       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2710       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2711     }
2712     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2713 
2714     if (hasop) {
2715       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2716       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2717     } else {
2718       /*
2719           These two lines are replaced by code that may be a bit faster for a good compiler
2720       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2721       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2722       */
2723       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2724       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2725       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2726       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2727       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2728       if (omega == 1.0) {
2729         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2730         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2731       } else {
2732         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2733         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2734       }
2735       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2736       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2737       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2738       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2739     }
2740 
2741     /* multiply off-diagonal portion of matrix */
2742     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2743     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2744     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2745     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2746     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2747     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2748     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2749     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2750     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2751     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2752 
2753     /* local sweep */
2754     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2755     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2756   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2762 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2763 {
2764   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2765   PetscErrorCode ierr;
2766   Vec            lvec1,bb1;
2767 
2768   PetscFunctionBegin;
2769   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2770   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2771 
2772   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2773     if (flag & SOR_ZERO_INITIAL_GUESS) {
2774       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2775       its--;
2776     }
2777 
2778     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2779     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2780     while (its--) {
2781       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2782 
2783       /* lower diagonal part: bb1 = bb - B^T*xx */
2784       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2785       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2786 
2787       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2788       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2789       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2790 
2791       /* upper diagonal part: bb1 = bb1 - B*x */
2792       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2793       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2794 
2795       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2796 
2797       /* diagonal sweep */
2798       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2799     }
2800     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2801     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2802   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 #undef __FUNCT__
2807 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2808 /*@
2809      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2810          CSR format the local rows.
2811 
2812    Collective on MPI_Comm
2813 
2814    Input Parameters:
2815 +  comm - MPI communicator
2816 .  bs - the block size, only a block size of 1 is supported
2817 .  m - number of local rows (Cannot be PETSC_DECIDE)
2818 .  n - This value should be the same as the local size used in creating the
2819        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2820        calculated if N is given) For square matrices n is almost always m.
2821 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2822 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2823 .   i - row indices
2824 .   j - column indices
2825 -   a - matrix values
2826 
2827    Output Parameter:
2828 .   mat - the matrix
2829 
2830    Level: intermediate
2831 
2832    Notes:
2833        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2834      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2835      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2836 
2837        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2838 
2839 .keywords: matrix, aij, compressed row, sparse, parallel
2840 
2841 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2842           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2843 @*/
2844 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2845 {
2846   PetscErrorCode ierr;
2847 
2848 
2849   PetscFunctionBegin;
2850   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2851   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2852   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2853   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2854   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2855   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 
2859 
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2862 /*@C
2863    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2864    (the default parallel PETSc format).
2865 
2866    Collective on MPI_Comm
2867 
2868    Input Parameters:
2869 +  B - the matrix
2870 .  bs - the block size
2871 .  i - the indices into j for the start of each local row (starts with zero)
2872 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2873 -  v - optional values in the matrix
2874 
2875    Level: developer
2876 
2877 .keywords: matrix, aij, compressed row, sparse, parallel
2878 
2879 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2880 @*/
2881 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2882 {
2883   PetscErrorCode ierr;
2884 
2885   PetscFunctionBegin;
2886   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 #undef __FUNCT__
2891 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2892 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2893 {
2894   PetscErrorCode ierr;
2895   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2896   PetscInt       *indx;
2897   PetscScalar    *values;
2898 
2899   PetscFunctionBegin;
2900   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2901   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2902     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2903     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2904     PetscInt       *bindx,rmax=a->rmax,j;
2905 
2906     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2907     mbs = m/bs; Nbs = N/cbs;
2908     if (n == PETSC_DECIDE) {
2909       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2910     }
2911     /* Check sum(n) = Nbs */
2912     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2913     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2914 
2915     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2916     rstart -= mbs;
2917 
2918     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2919     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2920     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2921     for (i=0; i<mbs; i++) {
2922       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2923       nnz = nnz/bs;
2924       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2925       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2926       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2927     }
2928     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2929     ierr = PetscFree(bindx);CHKERRQ(ierr);
2930 
2931     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2932     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2933     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2934     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2935     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2936     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2937   }
2938 
2939   /* numeric phase */
2940   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2941   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2942 
2943   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2944   for (i=0; i<m; i++) {
2945     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2946     Ii   = i + rstart;
2947     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2948     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2949   }
2950   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2951   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2952   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955