xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 8f46ffca725d709c802a056bffe944fc7c220922)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 #if defined(PETSC_HAVE_ELEMENTAL)
23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
24 #endif
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
28 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
29 {
30   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
31   PetscErrorCode ierr;
32 
33   PetscFunctionBegin;
34   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
35   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
53   { \
54  \
55     brow = row/bs;  \
56     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
57     rmax = aimax[brow]; nrow = ailen[brow]; \
58     bcol = col/bs; \
59     ridx = row % bs; cidx = col % bs; \
60     low  = 0; high = nrow; \
61     while (high-low > 3) { \
62       t = (low+high)/2; \
63       if (rp[t] > bcol) high = t; \
64       else              low  = t; \
65     } \
66     for (_i=low; _i<high; _i++) { \
67       if (rp[_i] > bcol) break; \
68       if (rp[_i] == bcol) { \
69         bap = ap + bs2*_i + bs*cidx + ridx; \
70         if (addv == ADD_VALUES) *bap += value;  \
71         else                    *bap  = value;  \
72         goto a_noinsert; \
73       } \
74     } \
75     if (a->nonew == 1) goto a_noinsert; \
76     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
77     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
78     N = nrow++ - 1;  \
79     /* shift up all the later entries in this row */ \
80     for (ii=N; ii>=_i; ii--) { \
81       rp[ii+1] = rp[ii]; \
82       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
83     } \
84     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
85     rp[_i]                      = bcol;  \
86     ap[bs2*_i + bs*cidx + ridx] = value;  \
87     A->nonzerostate++;\
88 a_noinsert:; \
89     ailen[brow] = nrow; \
90   }
91 
92 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
93   { \
94     brow = row/bs;  \
95     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
96     rmax = bimax[brow]; nrow = bilen[brow]; \
97     bcol = col/bs; \
98     ridx = row % bs; cidx = col % bs; \
99     low  = 0; high = nrow; \
100     while (high-low > 3) { \
101       t = (low+high)/2; \
102       if (rp[t] > bcol) high = t; \
103       else              low  = t; \
104     } \
105     for (_i=low; _i<high; _i++) { \
106       if (rp[_i] > bcol) break; \
107       if (rp[_i] == bcol) { \
108         bap = ap + bs2*_i + bs*cidx + ridx; \
109         if (addv == ADD_VALUES) *bap += value;  \
110         else                    *bap  = value;  \
111         goto b_noinsert; \
112       } \
113     } \
114     if (b->nonew == 1) goto b_noinsert; \
115     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
116     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
117     N = nrow++ - 1;  \
118     /* shift up all the later entries in this row */ \
119     for (ii=N; ii>=_i; ii--) { \
120       rp[ii+1] = rp[ii]; \
121       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122     } \
123     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
124     rp[_i]                      = bcol;  \
125     ap[bs2*_i + bs*cidx + ridx] = value;  \
126     B->nonzerostate++;\
127 b_noinsert:; \
128     bilen[brow] = nrow; \
129   }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat          A     = baij->A;
149   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar    *aa   =a->a;
152 
153   Mat         B     = baij->B;
154   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar   *ba   =b->a;
157 
158   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar *ap,*bap;
161 
162   /* for stash */
163   PetscInt  n_loc, *in_loc = NULL;
164   MatScalar *v_loc = NULL;
165 
166   PetscFunctionBegin;
167   if (!baij->donotstash) {
168     if (n > baij->n_loc) {
169       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
171       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
173 
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
192         }
193         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
194           col  = in[j] - cstart_orig;         /* local col index */
195           brow = row/bs; bcol = col/bs;
196           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
197           if (roworiented) value = v[i*n+j];
198           else             value = v[i+j*m];
199           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
200           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
201         } else if (in[j] < 0) continue;
202 #if defined(PETSC_USE_DEBUG)
203         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
204 #endif
205         else {  /* off-diag entry (B) */
206           if (mat->was_assembled) {
207             if (!baij->colmap) {
208               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
209             }
210 #if defined(PETSC_USE_CTABLE)
211             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
212             col  = col - 1;
213 #else
214             col = baij->colmap[in[j]/bs] - 1;
215 #endif
216             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
217               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
218               col  =  in[j];
219               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
220               B    = baij->B;
221               b    = (Mat_SeqBAIJ*)(B)->data;
222               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
223               ba   = b->a;
224             } else col += in[j]%bs;
225           } else col = in[j];
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc          = 0;
237         for (j=0; j<n; j++) {
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray     =baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if (!barray) {
269     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if ((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++ = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend) {
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         } else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined(PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined(PETSC_USE_CTABLE)
333             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col  = in[j];
341             }
342           } else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend) {
379           col  = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined(PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data--;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatNorm_MPISBAIJ"
405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
406 {
407   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscReal      sum[2],*lnorm2;
410 
411   PetscFunctionBegin;
412   if (baij->size == 1) {
413     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
414   } else {
415     if (type == NORM_FROBENIUS) {
416       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
417       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
419       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
421       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
422       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
423       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
424     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
425       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
426       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
427       PetscReal    *rsum,*rsum2,vabs;
428       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
429       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
430       MatScalar    *v;
431 
432       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
433       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz   = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++) {
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++) {
442             for (row=0; row<bs; row++) {
443               vabs            = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++) {
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++) {
459             for (row=0; row<bs; row++) {
460               vabs            = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
473     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
474   }
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
483   PetscErrorCode ierr;
484   PetscInt       nstash,reallocs;
485   InsertMode     addv;
486 
487   PetscFunctionBegin;
488   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
489 
490   /* make sure all processors are either in INSERTMODE or ADDMODE */
491   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
492   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
493   mat->insertmode = addv; /* in case this processor had no cache */
494 
495   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
496   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
500   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
507 {
508   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
509   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
510   PetscErrorCode ierr;
511   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
512   PetscInt       *row,*col;
513   PetscBool      other_disassembled;
514   PetscMPIInt    n;
515   PetscBool      r1,r2,r3;
516   MatScalar      *val;
517   InsertMode     addv = mat->insertmode;
518 
519   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
520   PetscFunctionBegin;
521   if (!baij->donotstash &&  !mat->nooffprocentries) {
522     while (1) {
523       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
524       if (!flg) break;
525 
526       for (i=0; i<n;) {
527         /* Now identify the consecutive vals belonging to the same row */
528         for (j=i,rstart=row[j]; j<n; j++) {
529           if (row[j] != rstart) break;
530         }
531         if (j < n) ncols = j-i;
532         else       ncols = n-i;
533         /* Now assemble all these values with a single function call */
534         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
535         i    = j;
536       }
537     }
538     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
539     /* Now process the block-stash. Since the values are stashed column-oriented,
540        set the roworiented flag to column oriented, and after MatSetValues()
541        restore the original flags */
542     r1 = baij->roworiented;
543     r2 = a->roworiented;
544     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
545 
546     baij->roworiented = PETSC_FALSE;
547     a->roworiented    = PETSC_FALSE;
548 
549     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
550     while (1) {
551       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
552       if (!flg) break;
553 
554       for (i=0; i<n;) {
555         /* Now identify the consecutive vals belonging to the same row */
556         for (j=i,rstart=row[j]; j<n; j++) {
557           if (row[j] != rstart) break;
558         }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i    = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566 
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569 
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596 
597   baij->rowvalues = 0;
598 
599   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
600   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
601     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
602     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
609 #include <petscdraw.h>
610 #undef __FUNCT__
611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
613 {
614   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
615   PetscErrorCode    ierr;
616   PetscInt          bs   = mat->rmap->bs;
617   PetscMPIInt       rank = baij->rank;
618   PetscBool         iascii,isdraw;
619   PetscViewer       sviewer;
620   PetscViewerFormat format;
621 
622   PetscFunctionBegin;
623   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
624   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
625   if (iascii) {
626     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
627     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
628       MatInfo info;
629       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
630       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
633       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
635       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
636       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
637       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
640       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_INFO) {
643       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
644       PetscFunctionReturn(0);
645     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
646       PetscFunctionReturn(0);
647     }
648   }
649 
650   if (isdraw) {
651     PetscDraw draw;
652     PetscBool isnull;
653     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
654     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
655   }
656 
657   {
658     /* assemble the entire matrix onto first processor. */
659     Mat          A;
660     Mat_SeqSBAIJ *Aloc;
661     Mat_SeqBAIJ  *Bloc;
662     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
663     MatScalar    *a;
664     const char   *matname;
665 
666     /* Should this be the same type as mat? */
667     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
668     if (!rank) {
669       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
670     } else {
671       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
672     }
673     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
674     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
675     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
676     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
677 
678     /* copy over the A part */
679     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
680     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
681     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
682 
683     for (i=0; i<mbs; i++) {
684       rvals[0] = bs*(baij->rstartbs + i);
685       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
686       for (j=ai[i]; j<ai[i+1]; j++) {
687         col = (baij->cstartbs+aj[j])*bs;
688         for (k=0; k<bs; k++) {
689           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
690           col++;
691           a += bs;
692         }
693       }
694     }
695     /* copy over the B part */
696     Bloc = (Mat_SeqBAIJ*)baij->B->data;
697     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
698     for (i=0; i<mbs; i++) {
699 
700       rvals[0] = bs*(baij->rstartbs + i);
701       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
702       for (j=ai[i]; j<ai[i+1]; j++) {
703         col = baij->garray[aj[j]]*bs;
704         for (k=0; k<bs; k++) {
705           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
706           col++;
707           a += bs;
708         }
709       }
710     }
711     ierr = PetscFree(rvals);CHKERRQ(ierr);
712     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
713     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
714     /*
715        Everyone has to call to draw the matrix since the graphics waits are
716        synchronized across all processors that share the PetscDraw object
717     */
718     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
722       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
723     }
724     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     ierr = MatDestroy(&A);CHKERRQ(ierr);
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatView_MPISBAIJ"
732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
733 {
734   PetscErrorCode ierr;
735   PetscBool      iascii,isdraw,issocket,isbinary;
736 
737   PetscFunctionBegin;
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
740   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
741   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
742   if (iascii || isdraw || issocket || isbinary) {
743     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatDestroy_MPISBAIJ"
750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
751 {
752   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756 #if defined(PETSC_USE_LOG)
757   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
758 #endif
759   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
760   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
761   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
762   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
763 #if defined(PETSC_USE_CTABLE)
764   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
765 #else
766   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
767 #endif
768   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
770   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
776   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
777   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
778   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
779   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
781   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
782   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
783 #if defined(PETSC_USE_REAL_MAT_SINGLE)
784   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
785 #endif
786   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
788   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
789   ierr = PetscFree(mat->data);CHKERRQ(ierr);
790 
791   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
796   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
797 #if defined(PETSC_HAVE_ELEMENTAL)
798   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
799 #endif
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
806 {
807   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
808   PetscErrorCode ierr;
809   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
810   PetscScalar    *x,*from;
811 
812   PetscFunctionBegin;
813   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
814   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
815 
816   /* diagonal part */
817   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
818   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
819 
820   /* subdiagonal part */
821   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
822 
823   /* copy x into the vec slvec0 */
824   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
826 
827   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
828   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
829   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
830 
831   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833   /* supperdiagonal part */
834   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMult_MPISBAIJ"
840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
841 {
842   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
843   PetscErrorCode    ierr;
844   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
845   PetscScalar       *from;
846   const PetscScalar *x;
847 
848   PetscFunctionBegin;
849   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
850   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
851 
852   /* diagonal part */
853   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
854   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
855 
856   /* subdiagonal part */
857   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
858 
859   /* copy x into the vec slvec0 */
860   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
861   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
862 
863   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
864   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
865   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
866 
867   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   /* supperdiagonal part */
870   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
876 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
877 {
878   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
879   PetscErrorCode ierr;
880   PetscInt       nt;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
885 
886   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
887   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
888 
889   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890   /* do diagonal part */
891   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
892   /* do supperdiagonal part */
893   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
895   /* do subdiagonal part */
896   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
897   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
898   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
904 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
905 {
906   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
907   PetscErrorCode    ierr;
908   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
909   PetscScalar       *from,zero=0.0;
910   const PetscScalar *x;
911 
912   PetscFunctionBegin;
913   /*
914   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
915   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
916   */
917   /* diagonal part */
918   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
919   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
920 
921   /* subdiagonal part */
922   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
923 
924   /* copy x into the vec slvec0 */
925   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
926   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
927   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
928   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
929 
930   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
932   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
933 
934   /* supperdiagonal part */
935   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
941 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
942 {
943   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   /* do diagonal part */
949   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
950   /* do supperdiagonal part */
951   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
953 
954   /* do subdiagonal part */
955   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
956   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
957   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 /*
962   This only works correctly for square matrices where the subblock A->A is the
963    diagonal block
964 */
965 #undef __FUNCT__
966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
968 {
969   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
974   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatScale_MPISBAIJ"
980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
981 {
982   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
987   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatGetRow_MPISBAIJ"
993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
994 {
995   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
996   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
997   PetscErrorCode ierr;
998   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
999   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1000   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1001 
1002   PetscFunctionBegin;
1003   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1004   mat->getrowactive = PETSC_TRUE;
1005 
1006   if (!mat->rowvalues && (idx || v)) {
1007     /*
1008         allocate enough space to hold information from the longest row.
1009     */
1010     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1011     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1012     PetscInt     max = 1,mbs = mat->mbs,tmp;
1013     for (i=0; i<mbs; i++) {
1014       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1015       if (max < tmp) max = tmp;
1016     }
1017     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1018   }
1019 
1020   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1021   lrow = row - brstart;  /* local row index */
1022 
1023   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1024   if (!v)   {pvA = 0; pvB = 0;}
1025   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1026   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1027   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1028   nztot = nzA + nzB;
1029 
1030   cmap = mat->garray;
1031   if (v  || idx) {
1032     if (nztot) {
1033       /* Sort by increasing column numbers, assuming A and B already sorted */
1034       PetscInt imark = -1;
1035       if (v) {
1036         *v = v_p = mat->rowvalues;
1037         for (i=0; i<nzB; i++) {
1038           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1039           else break;
1040         }
1041         imark = i;
1042         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1043         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1044       }
1045       if (idx) {
1046         *idx = idx_p = mat->rowindices;
1047         if (imark > -1) {
1048           for (i=0; i<imark; i++) {
1049             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1050           }
1051         } else {
1052           for (i=0; i<nzB; i++) {
1053             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1054             else break;
1055           }
1056           imark = i;
1057         }
1058         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1059         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1060       }
1061     } else {
1062       if (idx) *idx = 0;
1063       if (v)   *v   = 0;
1064     }
1065   }
1066   *nz  = nztot;
1067   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1068   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1075 {
1076   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1077 
1078   PetscFunctionBegin;
1079   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1080   baij->getrowactive = PETSC_FALSE;
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1086 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1087 {
1088   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1089   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1090 
1091   PetscFunctionBegin;
1092   aA->getrow_utriangular = PETSC_TRUE;
1093   PetscFunctionReturn(0);
1094 }
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1097 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1098 {
1099   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1100   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1101 
1102   PetscFunctionBegin;
1103   aA->getrow_utriangular = PETSC_FALSE;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1109 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1110 {
1111   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1116   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1122 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1123 {
1124   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1129   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1135 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1136 {
1137   PetscErrorCode ierr;
1138   IS             iscol_local;
1139   PetscInt       csize;
1140 
1141   PetscFunctionBegin;
1142   MPI_Comm    comm;
1143   PetscMPIInt rank;
1144   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1145   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1146 
1147   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1148   if (call == MAT_REUSE_MATRIX) {
1149     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1150     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1151   } else {
1152     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1153 
1154     { /* check if isrow == iscol_local Note: isrow is distributed, but iscol_local is locally owned! */
1155     PetscInt  sz1,sz2,*a1,*a2,i,j,k,nmatch;
1156     const PetscInt *ptr1,*ptr2;
1157     ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1158     ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1159     if (sz1 > sz2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1160 
1161     ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1162     ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1163 
1164     ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1165     ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1166     ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1167     ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1168     ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1169     ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1170 
1171     nmatch=0;
1172     k = 0;
1173     for (i=0; i<sz1; i++){
1174       for (j=k; j<sz2; j++){
1175         if (a1[i] == a2[j]) {
1176           k = j; nmatch++;
1177           if (rank == 1) printf("match %d\n",a2[j]);
1178           break;
1179         }
1180       }
1181     }
1182     ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1183     ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1184     ierr = PetscFree(a1);CHKERRQ(ierr);
1185     ierr = PetscFree(a2);CHKERRQ(ierr);
1186     if (nmatch < sz1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1187     }
1188   }
1189 
1190   /* now call MatGetSubMatrix_MPIBAIJ() */
1191   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); //csize: global iscol size
1192   if (call == MAT_INITIAL_MATRIX) {
1193     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1194     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1195   }
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1201 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1202 {
1203   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1204   PetscErrorCode ierr;
1205 
1206   PetscFunctionBegin;
1207   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1208   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1214 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1215 {
1216   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1217   Mat            A  = a->A,B = a->B;
1218   PetscErrorCode ierr;
1219   PetscReal      isend[5],irecv[5];
1220 
1221   PetscFunctionBegin;
1222   info->block_size = (PetscReal)matin->rmap->bs;
1223 
1224   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1225 
1226   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1227   isend[3] = info->memory;  isend[4] = info->mallocs;
1228 
1229   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1230 
1231   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1232   isend[3] += info->memory;  isend[4] += info->mallocs;
1233   if (flag == MAT_LOCAL) {
1234     info->nz_used      = isend[0];
1235     info->nz_allocated = isend[1];
1236     info->nz_unneeded  = isend[2];
1237     info->memory       = isend[3];
1238     info->mallocs      = isend[4];
1239   } else if (flag == MAT_GLOBAL_MAX) {
1240     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1241 
1242     info->nz_used      = irecv[0];
1243     info->nz_allocated = irecv[1];
1244     info->nz_unneeded  = irecv[2];
1245     info->memory       = irecv[3];
1246     info->mallocs      = irecv[4];
1247   } else if (flag == MAT_GLOBAL_SUM) {
1248     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1249 
1250     info->nz_used      = irecv[0];
1251     info->nz_allocated = irecv[1];
1252     info->nz_unneeded  = irecv[2];
1253     info->memory       = irecv[3];
1254     info->mallocs      = irecv[4];
1255   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1256   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1257   info->fill_ratio_needed = 0;
1258   info->factor_mallocs    = 0;
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1264 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1265 {
1266   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1267   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271   switch (op) {
1272   case MAT_NEW_NONZERO_LOCATIONS:
1273   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1274   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1275   case MAT_KEEP_NONZERO_PATTERN:
1276   case MAT_NEW_NONZERO_LOCATION_ERR:
1277     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1278     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1279     break;
1280   case MAT_ROW_ORIENTED:
1281     a->roworiented = flg;
1282 
1283     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1284     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1285     break;
1286   case MAT_NEW_DIAGONALS:
1287     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1288     break;
1289   case MAT_IGNORE_OFF_PROC_ENTRIES:
1290     a->donotstash = flg;
1291     break;
1292   case MAT_USE_HASH_TABLE:
1293     a->ht_flag = flg;
1294     break;
1295   case MAT_HERMITIAN:
1296     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1297     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1298 
1299     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1300     break;
1301   case MAT_SPD:
1302     A->spd_set = PETSC_TRUE;
1303     A->spd     = flg;
1304     if (flg) {
1305       A->symmetric                  = PETSC_TRUE;
1306       A->structurally_symmetric     = PETSC_TRUE;
1307       A->symmetric_set              = PETSC_TRUE;
1308       A->structurally_symmetric_set = PETSC_TRUE;
1309     }
1310     break;
1311   case MAT_SYMMETRIC:
1312     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1313     break;
1314   case MAT_STRUCTURALLY_SYMMETRIC:
1315     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1316     break;
1317   case MAT_SYMMETRY_ETERNAL:
1318     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1319     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1320     break;
1321   case MAT_IGNORE_LOWER_TRIANGULAR:
1322     aA->ignore_ltriangular = flg;
1323     break;
1324   case MAT_ERROR_LOWER_TRIANGULAR:
1325     aA->ignore_ltriangular = flg;
1326     break;
1327   case MAT_GETROW_UPPERTRIANGULAR:
1328     aA->getrow_utriangular = flg;
1329     break;
1330   default:
1331     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1332   }
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1338 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1339 {
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   if (MAT_INITIAL_MATRIX || *B != A) {
1344     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1345   }
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1351 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1352 {
1353   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1354   Mat            a     = baij->A, b=baij->B;
1355   PetscErrorCode ierr;
1356   PetscInt       nv,m,n;
1357   PetscBool      flg;
1358 
1359   PetscFunctionBegin;
1360   if (ll != rr) {
1361     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1362     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1363   }
1364   if (!ll) PetscFunctionReturn(0);
1365 
1366   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1367   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1368 
1369   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1370   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1371 
1372   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1373 
1374   /* left diagonalscale the off-diagonal part */
1375   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1376 
1377   /* scale the diagonal part */
1378   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1379 
1380   /* right diagonalscale the off-diagonal part */
1381   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1382   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1388 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1389 {
1390   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1391   PetscErrorCode ierr;
1392 
1393   PetscFunctionBegin;
1394   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatEqual_MPISBAIJ"
1402 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1403 {
1404   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1405   Mat            a,b,c,d;
1406   PetscBool      flg;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   a = matA->A; b = matA->B;
1411   c = matB->A; d = matB->B;
1412 
1413   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1414   if (flg) {
1415     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1416   }
1417   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatCopy_MPISBAIJ"
1423 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1424 {
1425   PetscErrorCode ierr;
1426   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1427   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1428 
1429   PetscFunctionBegin;
1430   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1431   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1432     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1433     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1434     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1435   } else {
1436     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1437     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1438   }
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1444 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1445 {
1446   PetscErrorCode ierr;
1447 
1448   PetscFunctionBegin;
1449   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1455 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1456 {
1457   PetscErrorCode ierr;
1458   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1459   PetscBLASInt   bnz,one=1;
1460   Mat_SeqSBAIJ   *xa,*ya;
1461   Mat_SeqBAIJ    *xb,*yb;
1462 
1463   PetscFunctionBegin;
1464   if (str == SAME_NONZERO_PATTERN) {
1465     PetscScalar alpha = a;
1466     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1467     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1468     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1469     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1470     xb   = (Mat_SeqBAIJ*)xx->B->data;
1471     yb   = (Mat_SeqBAIJ*)yy->B->data;
1472     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1473     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1474     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1475   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1476     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1477     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1478     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1479   } else {
1480     Mat      B;
1481     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1482     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1483     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1484     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1485     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1486     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1487     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1488     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1489     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1490     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1491     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1492     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1493     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1494     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1495     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1496     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1497     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1498     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1499     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1500     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1501   }
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1507 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1508 {
1509   PetscErrorCode ierr;
1510   PetscInt       i;
1511   PetscBool      flg;
1512 
1513   PetscFunctionBegin;
1514   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1515   for (i=0; i<n; i++) {
1516     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1517     if (!flg) { /* *B[i] is non-symmetric, set flag */
1518       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1519     }
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 /* -------------------------------------------------------------------*/
1525 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1526                                        MatGetRow_MPISBAIJ,
1527                                        MatRestoreRow_MPISBAIJ,
1528                                        MatMult_MPISBAIJ,
1529                                /*  4*/ MatMultAdd_MPISBAIJ,
1530                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1531                                        MatMultAdd_MPISBAIJ,
1532                                        0,
1533                                        0,
1534                                        0,
1535                                /* 10*/ 0,
1536                                        0,
1537                                        0,
1538                                        MatSOR_MPISBAIJ,
1539                                        MatTranspose_MPISBAIJ,
1540                                /* 15*/ MatGetInfo_MPISBAIJ,
1541                                        MatEqual_MPISBAIJ,
1542                                        MatGetDiagonal_MPISBAIJ,
1543                                        MatDiagonalScale_MPISBAIJ,
1544                                        MatNorm_MPISBAIJ,
1545                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1546                                        MatAssemblyEnd_MPISBAIJ,
1547                                        MatSetOption_MPISBAIJ,
1548                                        MatZeroEntries_MPISBAIJ,
1549                                /* 24*/ 0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                        0,
1554                                /* 29*/ MatSetUp_MPISBAIJ,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                        0,
1559                                /* 34*/ MatDuplicate_MPISBAIJ,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                        0,
1564                                /* 39*/ MatAXPY_MPISBAIJ,
1565                                        MatGetSubMatrices_MPISBAIJ,
1566                                        MatIncreaseOverlap_MPISBAIJ,
1567                                        MatGetValues_MPISBAIJ,
1568                                        MatCopy_MPISBAIJ,
1569                                /* 44*/ 0,
1570                                        MatScale_MPISBAIJ,
1571                                        0,
1572                                        0,
1573                                        0,
1574                                /* 49*/ 0,
1575                                        0,
1576                                        0,
1577                                        0,
1578                                        0,
1579                                /* 54*/ 0,
1580                                        0,
1581                                        MatSetUnfactored_MPISBAIJ,
1582                                        0,
1583                                        MatSetValuesBlocked_MPISBAIJ,
1584                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1585                                        0,
1586                                        0,
1587                                        0,
1588                                        0,
1589                                /* 64*/ 0,
1590                                        0,
1591                                        0,
1592                                        0,
1593                                        0,
1594                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1595                                        0,
1596                                        0,
1597                                        0,
1598                                        0,
1599                                /* 74*/ 0,
1600                                        0,
1601                                        0,
1602                                        0,
1603                                        0,
1604                                /* 79*/ 0,
1605                                        0,
1606                                        0,
1607                                        0,
1608                                        MatLoad_MPISBAIJ,
1609                                /* 84*/ 0,
1610                                        0,
1611                                        0,
1612                                        0,
1613                                        0,
1614                                /* 89*/ 0,
1615                                        0,
1616                                        0,
1617                                        0,
1618                                        0,
1619                                /* 94*/ 0,
1620                                        0,
1621                                        0,
1622                                        0,
1623                                        0,
1624                                /* 99*/ 0,
1625                                        0,
1626                                        0,
1627                                        0,
1628                                        0,
1629                                /*104*/ 0,
1630                                        MatRealPart_MPISBAIJ,
1631                                        MatImaginaryPart_MPISBAIJ,
1632                                        MatGetRowUpperTriangular_MPISBAIJ,
1633                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1634                                /*109*/ 0,
1635                                        0,
1636                                        0,
1637                                        0,
1638                                        0,
1639                                /*114*/ 0,
1640                                        0,
1641                                        0,
1642                                        0,
1643                                        0,
1644                                /*119*/ 0,
1645                                        0,
1646                                        0,
1647                                        0,
1648                                        0,
1649                                /*124*/ 0,
1650                                        0,
1651                                        0,
1652                                        0,
1653                                        0,
1654                                /*129*/ 0,
1655                                        0,
1656                                        0,
1657                                        0,
1658                                        0,
1659                                /*134*/ 0,
1660                                        0,
1661                                        0,
1662                                        0,
1663                                        0,
1664                                /*139*/ 0,
1665                                        0,
1666                                        0,
1667                                        0,
1668                                        0,
1669                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1670 };
1671 
1672 #undef __FUNCT__
1673 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1674 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1675 {
1676   PetscFunctionBegin;
1677   *a = ((Mat_MPISBAIJ*)A->data)->A;
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1683 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1684 {
1685   Mat_MPISBAIJ   *b;
1686   PetscErrorCode ierr;
1687   PetscInt       i,mbs,Mbs;
1688 
1689   PetscFunctionBegin;
1690   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1691   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1692   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1693   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1694 
1695   b   = (Mat_MPISBAIJ*)B->data;
1696   mbs = B->rmap->n/bs;
1697   Mbs = B->rmap->N/bs;
1698   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1699 
1700   B->rmap->bs = bs;
1701   b->bs2      = bs*bs;
1702   b->mbs      = mbs;
1703   b->Mbs      = Mbs;
1704   b->nbs      = B->cmap->n/bs;
1705   b->Nbs      = B->cmap->N/bs;
1706 
1707   for (i=0; i<=b->size; i++) {
1708     b->rangebs[i] = B->rmap->range[i]/bs;
1709   }
1710   b->rstartbs = B->rmap->rstart/bs;
1711   b->rendbs   = B->rmap->rend/bs;
1712 
1713   b->cstartbs = B->cmap->rstart/bs;
1714   b->cendbs   = B->cmap->rend/bs;
1715 
1716   if (!B->preallocated) {
1717     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1718     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1719     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1720     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1721     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1722     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1723     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1724     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1725     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1726   }
1727 
1728   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1729   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1730 
1731   B->preallocated = PETSC_TRUE;
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1737 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1738 {
1739   PetscInt       m,rstart,cstart,cend;
1740   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1741   const PetscInt *JJ    =0;
1742   PetscScalar    *values=0;
1743   PetscErrorCode ierr;
1744 
1745   PetscFunctionBegin;
1746   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1747   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1748   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1749   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1750   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1751   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1752   m      = B->rmap->n/bs;
1753   rstart = B->rmap->rstart/bs;
1754   cstart = B->cmap->rstart/bs;
1755   cend   = B->cmap->rend/bs;
1756 
1757   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1758   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1759   for (i=0; i<m; i++) {
1760     nz = ii[i+1] - ii[i];
1761     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1762     nz_max = PetscMax(nz_max,nz);
1763     JJ     = jj + ii[i];
1764     for (j=0; j<nz; j++) {
1765       if (*JJ >= cstart) break;
1766       JJ++;
1767     }
1768     d = 0;
1769     for (; j<nz; j++) {
1770       if (*JJ++ >= cend) break;
1771       d++;
1772     }
1773     d_nnz[i] = d;
1774     o_nnz[i] = nz - d;
1775   }
1776   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1777   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1778 
1779   values = (PetscScalar*)V;
1780   if (!values) {
1781     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1782     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1783   }
1784   for (i=0; i<m; i++) {
1785     PetscInt          row    = i + rstart;
1786     PetscInt          ncols  = ii[i+1] - ii[i];
1787     const PetscInt    *icols = jj + ii[i];
1788     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1789     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1790   }
1791 
1792   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1793   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 /*MC
1800    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1801    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1802    the matrix is stored.
1803 
1804   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1805   can call MatSetOption(Mat, MAT_HERMITIAN);
1806 
1807    Options Database Keys:
1808 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1809 
1810   Level: beginner
1811 
1812 .seealso: MatCreateMPISBAIJ
1813 M*/
1814 
1815 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatCreate_MPISBAIJ"
1819 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1820 {
1821   Mat_MPISBAIJ   *b;
1822   PetscErrorCode ierr;
1823   PetscBool      flg = PETSC_FALSE;
1824 
1825   PetscFunctionBegin;
1826   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1827   B->data = (void*)b;
1828   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1829 
1830   B->ops->destroy = MatDestroy_MPISBAIJ;
1831   B->ops->view    = MatView_MPISBAIJ;
1832   B->assembled    = PETSC_FALSE;
1833   B->insertmode   = NOT_SET_VALUES;
1834 
1835   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1836   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1837 
1838   /* build local table of row and column ownerships */
1839   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1840 
1841   /* build cache for off array entries formed */
1842   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1843 
1844   b->donotstash  = PETSC_FALSE;
1845   b->colmap      = NULL;
1846   b->garray      = NULL;
1847   b->roworiented = PETSC_TRUE;
1848 
1849   /* stuff used in block assembly */
1850   b->barray = 0;
1851 
1852   /* stuff used for matrix vector multiply */
1853   b->lvec    = 0;
1854   b->Mvctx   = 0;
1855   b->slvec0  = 0;
1856   b->slvec0b = 0;
1857   b->slvec1  = 0;
1858   b->slvec1a = 0;
1859   b->slvec1b = 0;
1860   b->sMvctx  = 0;
1861 
1862   /* stuff for MatGetRow() */
1863   b->rowindices   = 0;
1864   b->rowvalues    = 0;
1865   b->getrowactive = PETSC_FALSE;
1866 
1867   /* hash table stuff */
1868   b->ht           = 0;
1869   b->hd           = 0;
1870   b->ht_size      = 0;
1871   b->ht_flag      = PETSC_FALSE;
1872   b->ht_fact      = 0;
1873   b->ht_total_ct  = 0;
1874   b->ht_insert_ct = 0;
1875 
1876   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1877   b->ijonly = PETSC_FALSE;
1878 
1879   b->in_loc = 0;
1880   b->v_loc  = 0;
1881   b->n_loc  = 0;
1882 
1883   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1884   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1885   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1886   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1887   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1888   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1889 #if defined(PETSC_HAVE_ELEMENTAL)
1890   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
1891 #endif
1892 
1893   B->symmetric                  = PETSC_TRUE;
1894   B->structurally_symmetric     = PETSC_TRUE;
1895   B->symmetric_set              = PETSC_TRUE;
1896   B->structurally_symmetric_set = PETSC_TRUE;
1897 
1898   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1899   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1900   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1901   if (flg) {
1902     PetscReal fact = 1.39;
1903     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1904     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1905     if (fact <= 1.0) fact = 1.39;
1906     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1907     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1908   }
1909   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 /*MC
1914    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1915 
1916    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1917    and MATMPISBAIJ otherwise.
1918 
1919    Options Database Keys:
1920 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1921 
1922   Level: beginner
1923 
1924 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1925 M*/
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1929 /*@C
1930    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1931    the user should preallocate the matrix storage by setting the parameters
1932    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1933    performance can be increased by more than a factor of 50.
1934 
1935    Collective on Mat
1936 
1937    Input Parameters:
1938 +  B - the matrix
1939 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1940           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1941 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1942            submatrix  (same for all local rows)
1943 .  d_nnz - array containing the number of block nonzeros in the various block rows
1944            in the upper triangular and diagonal part of the in diagonal portion of the local
1945            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1946            for the diagonal entry and set a value even if it is zero.
1947 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1948            submatrix (same for all local rows).
1949 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1950            off-diagonal portion of the local submatrix that is right of the diagonal
1951            (possibly different for each block row) or NULL.
1952 
1953 
1954    Options Database Keys:
1955 .   -mat_no_unroll - uses code that does not unroll the loops in the
1956                      block calculations (much slower)
1957 .   -mat_block_size - size of the blocks to use
1958 
1959    Notes:
1960 
1961    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1962    than it must be used on all processors that share the object for that argument.
1963 
1964    If the *_nnz parameter is given then the *_nz parameter is ignored
1965 
1966    Storage Information:
1967    For a square global matrix we define each processor's diagonal portion
1968    to be its local rows and the corresponding columns (a square submatrix);
1969    each processor's off-diagonal portion encompasses the remainder of the
1970    local matrix (a rectangular submatrix).
1971 
1972    The user can specify preallocated storage for the diagonal part of
1973    the local submatrix with either d_nz or d_nnz (not both).  Set
1974    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1975    memory allocation.  Likewise, specify preallocated storage for the
1976    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1977 
1978    You can call MatGetInfo() to get information on how effective the preallocation was;
1979    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1980    You can also run with the option -info and look for messages with the string
1981    malloc in them to see if additional memory allocation was needed.
1982 
1983    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1984    the figure below we depict these three local rows and all columns (0-11).
1985 
1986 .vb
1987            0 1 2 3 4 5 6 7 8 9 10 11
1988           --------------------------
1989    row 3  |. . . d d d o o o o  o  o
1990    row 4  |. . . d d d o o o o  o  o
1991    row 5  |. . . d d d o o o o  o  o
1992           --------------------------
1993 .ve
1994 
1995    Thus, any entries in the d locations are stored in the d (diagonal)
1996    submatrix, and any entries in the o locations are stored in the
1997    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1998    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1999 
2000    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2001    plus the diagonal part of the d matrix,
2002    and o_nz should indicate the number of block nonzeros per row in the o matrix
2003 
2004    In general, for PDE problems in which most nonzeros are near the diagonal,
2005    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2006    or you will get TERRIBLE performance; see the users' manual chapter on
2007    matrices.
2008 
2009    Level: intermediate
2010 
2011 .keywords: matrix, block, aij, compressed row, sparse, parallel
2012 
2013 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2014 @*/
2015 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2016 {
2017   PetscErrorCode ierr;
2018 
2019   PetscFunctionBegin;
2020   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2021   PetscValidType(B,1);
2022   PetscValidLogicalCollectiveInt(B,bs,2);
2023   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatCreateSBAIJ"
2029 /*@C
2030    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2031    (block compressed row).  For good matrix assembly performance
2032    the user should preallocate the matrix storage by setting the parameters
2033    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2034    performance can be increased by more than a factor of 50.
2035 
2036    Collective on MPI_Comm
2037 
2038    Input Parameters:
2039 +  comm - MPI communicator
2040 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2041           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2042 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2043            This value should be the same as the local size used in creating the
2044            y vector for the matrix-vector product y = Ax.
2045 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2046            This value should be the same as the local size used in creating the
2047            x vector for the matrix-vector product y = Ax.
2048 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2049 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2050 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2051            submatrix  (same for all local rows)
2052 .  d_nnz - array containing the number of block nonzeros in the various block rows
2053            in the upper triangular portion of the in diagonal portion of the local
2054            (possibly different for each block block row) or NULL.
2055            If you plan to factor the matrix you must leave room for the diagonal entry and
2056            set its value even if it is zero.
2057 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2058            submatrix (same for all local rows).
2059 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2060            off-diagonal portion of the local submatrix (possibly different for
2061            each block row) or NULL.
2062 
2063    Output Parameter:
2064 .  A - the matrix
2065 
2066    Options Database Keys:
2067 .   -mat_no_unroll - uses code that does not unroll the loops in the
2068                      block calculations (much slower)
2069 .   -mat_block_size - size of the blocks to use
2070 .   -mat_mpi - use the parallel matrix data structures even on one processor
2071                (defaults to using SeqBAIJ format on one processor)
2072 
2073    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2074    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2075    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2076 
2077    Notes:
2078    The number of rows and columns must be divisible by blocksize.
2079    This matrix type does not support complex Hermitian operation.
2080 
2081    The user MUST specify either the local or global matrix dimensions
2082    (possibly both).
2083 
2084    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2085    than it must be used on all processors that share the object for that argument.
2086 
2087    If the *_nnz parameter is given then the *_nz parameter is ignored
2088 
2089    Storage Information:
2090    For a square global matrix we define each processor's diagonal portion
2091    to be its local rows and the corresponding columns (a square submatrix);
2092    each processor's off-diagonal portion encompasses the remainder of the
2093    local matrix (a rectangular submatrix).
2094 
2095    The user can specify preallocated storage for the diagonal part of
2096    the local submatrix with either d_nz or d_nnz (not both).  Set
2097    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2098    memory allocation.  Likewise, specify preallocated storage for the
2099    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2100 
2101    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2102    the figure below we depict these three local rows and all columns (0-11).
2103 
2104 .vb
2105            0 1 2 3 4 5 6 7 8 9 10 11
2106           --------------------------
2107    row 3  |. . . d d d o o o o  o  o
2108    row 4  |. . . d d d o o o o  o  o
2109    row 5  |. . . d d d o o o o  o  o
2110           --------------------------
2111 .ve
2112 
2113    Thus, any entries in the d locations are stored in the d (diagonal)
2114    submatrix, and any entries in the o locations are stored in the
2115    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2116    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2117 
2118    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2119    plus the diagonal part of the d matrix,
2120    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2121    In general, for PDE problems in which most nonzeros are near the diagonal,
2122    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2123    or you will get TERRIBLE performance; see the users' manual chapter on
2124    matrices.
2125 
2126    Level: intermediate
2127 
2128 .keywords: matrix, block, aij, compressed row, sparse, parallel
2129 
2130 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2131 @*/
2132 
2133 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2134 {
2135   PetscErrorCode ierr;
2136   PetscMPIInt    size;
2137 
2138   PetscFunctionBegin;
2139   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2140   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2141   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2142   if (size > 1) {
2143     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2144     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2145   } else {
2146     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2147     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2148   }
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2155 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2156 {
2157   Mat            mat;
2158   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2159   PetscErrorCode ierr;
2160   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2161   PetscScalar    *array;
2162 
2163   PetscFunctionBegin;
2164   *newmat = 0;
2165 
2166   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2167   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2168   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2169   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2170   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2171   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2172 
2173   mat->factortype   = matin->factortype;
2174   mat->preallocated = PETSC_TRUE;
2175   mat->assembled    = PETSC_TRUE;
2176   mat->insertmode   = NOT_SET_VALUES;
2177 
2178   a      = (Mat_MPISBAIJ*)mat->data;
2179   a->bs2 = oldmat->bs2;
2180   a->mbs = oldmat->mbs;
2181   a->nbs = oldmat->nbs;
2182   a->Mbs = oldmat->Mbs;
2183   a->Nbs = oldmat->Nbs;
2184 
2185 
2186   a->size         = oldmat->size;
2187   a->rank         = oldmat->rank;
2188   a->donotstash   = oldmat->donotstash;
2189   a->roworiented  = oldmat->roworiented;
2190   a->rowindices   = 0;
2191   a->rowvalues    = 0;
2192   a->getrowactive = PETSC_FALSE;
2193   a->barray       = 0;
2194   a->rstartbs     = oldmat->rstartbs;
2195   a->rendbs       = oldmat->rendbs;
2196   a->cstartbs     = oldmat->cstartbs;
2197   a->cendbs       = oldmat->cendbs;
2198 
2199   /* hash table stuff */
2200   a->ht           = 0;
2201   a->hd           = 0;
2202   a->ht_size      = 0;
2203   a->ht_flag      = oldmat->ht_flag;
2204   a->ht_fact      = oldmat->ht_fact;
2205   a->ht_total_ct  = 0;
2206   a->ht_insert_ct = 0;
2207 
2208   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2209   if (oldmat->colmap) {
2210 #if defined(PETSC_USE_CTABLE)
2211     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2212 #else
2213     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2214     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2215     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2216 #endif
2217   } else a->colmap = 0;
2218 
2219   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2220     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2221     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2222     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2223   } else a->garray = 0;
2224 
2225   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2226   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2227   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2228   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2229   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2230 
2231   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2232   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2233   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2234   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2235 
2236   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2237   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2238   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2239   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2240   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2241   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2242   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2243   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2244   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2245   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2246   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2247   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2248   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2249 
2250   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2251   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2252   a->sMvctx = oldmat->sMvctx;
2253   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2254 
2255   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2256   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2257   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2258   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2259   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2260   *newmat = mat;
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatLoad_MPISBAIJ"
2266 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2267 {
2268   PetscErrorCode ierr;
2269   PetscInt       i,nz,j,rstart,rend;
2270   PetscScalar    *vals,*buf;
2271   MPI_Comm       comm;
2272   MPI_Status     status;
2273   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2274   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2275   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2276   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2277   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2278   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2279   int            fd;
2280 
2281   PetscFunctionBegin;
2282   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2283   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2284   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2285   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2286   if (bs < 0) bs = 1;
2287 
2288   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2289   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2290   if (!rank) {
2291     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2292     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2293     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2294     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2295   }
2296 
2297   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2298 
2299   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2300   M    = header[1];
2301   N    = header[2];
2302 
2303   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2304   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2305   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2306 
2307   /* If global sizes are set, check if they are consistent with that given in the file */
2308   if (sizesset) {
2309     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2310   }
2311   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2312   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2313 
2314   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2315 
2316   /*
2317      This code adds extra rows to make sure the number of rows is
2318      divisible by the blocksize
2319   */
2320   Mbs        = M/bs;
2321   extra_rows = bs - M + bs*(Mbs);
2322   if (extra_rows == bs) extra_rows = 0;
2323   else                  Mbs++;
2324   if (extra_rows &&!rank) {
2325     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2326   }
2327 
2328   /* determine ownership of all rows */
2329   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2330     mbs = Mbs/size + ((Mbs % size) > rank);
2331     m   = mbs*bs;
2332   } else { /* User Set */
2333     m   = newmat->rmap->n;
2334     mbs = m/bs;
2335   }
2336   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2337   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2338   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2339   rowners[0] = 0;
2340   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2341   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2342   rstart = rowners[rank];
2343   rend   = rowners[rank+1];
2344 
2345   /* distribute row lengths to all processors */
2346   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2347   if (!rank) {
2348     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2349     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2350     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2351     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2352     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2353     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2354     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2355   } else {
2356     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2357   }
2358 
2359   if (!rank) {   /* procs[0] */
2360     /* calculate the number of nonzeros on each processor */
2361     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2362     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2363     for (i=0; i<size; i++) {
2364       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2365         procsnz[i] += rowlengths[j];
2366       }
2367     }
2368     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2369 
2370     /* determine max buffer needed and allocate it */
2371     maxnz = 0;
2372     for (i=0; i<size; i++) {
2373       maxnz = PetscMax(maxnz,procsnz[i]);
2374     }
2375     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2376 
2377     /* read in my part of the matrix column indices  */
2378     nz     = procsnz[0];
2379     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2380     mycols = ibuf;
2381     if (size == 1) nz -= extra_rows;
2382     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2383     if (size == 1) {
2384       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2385     }
2386 
2387     /* read in every ones (except the last) and ship off */
2388     for (i=1; i<size-1; i++) {
2389       nz   = procsnz[i];
2390       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2391       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2392     }
2393     /* read in the stuff for the last proc */
2394     if (size != 1) {
2395       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2396       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2397       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2398       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2399     }
2400     ierr = PetscFree(cols);CHKERRQ(ierr);
2401   } else {  /* procs[i], i>0 */
2402     /* determine buffer space needed for message */
2403     nz = 0;
2404     for (i=0; i<m; i++) nz += locrowlens[i];
2405     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2406     mycols = ibuf;
2407     /* receive message of column indices*/
2408     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2409     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2410     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2411   }
2412 
2413   /* loop over local rows, determining number of off diagonal entries */
2414   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2415   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2416   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2417   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2418   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2419   rowcount = 0;
2420   nzcount  = 0;
2421   for (i=0; i<mbs; i++) {
2422     dcount  = 0;
2423     odcount = 0;
2424     for (j=0; j<bs; j++) {
2425       kmax = locrowlens[rowcount];
2426       for (k=0; k<kmax; k++) {
2427         tmp = mycols[nzcount++]/bs; /* block col. index */
2428         if (!mask[tmp]) {
2429           mask[tmp] = 1;
2430           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2431           else masked1[dcount++] = tmp; /* entry in diag portion */
2432         }
2433       }
2434       rowcount++;
2435     }
2436 
2437     dlens[i]  = dcount;  /* d_nzz[i] */
2438     odlens[i] = odcount; /* o_nzz[i] */
2439 
2440     /* zero out the mask elements we set */
2441     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2442     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2443   }
2444   if (!sizesset) {
2445     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2446   }
2447   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2448   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2449 
2450   if (!rank) {
2451     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2452     /* read in my part of the matrix numerical values  */
2453     nz     = procsnz[0];
2454     vals   = buf;
2455     mycols = ibuf;
2456     if (size == 1) nz -= extra_rows;
2457     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2458     if (size == 1) {
2459       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2460     }
2461 
2462     /* insert into matrix */
2463     jj = rstart*bs;
2464     for (i=0; i<m; i++) {
2465       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2466       mycols += locrowlens[i];
2467       vals   += locrowlens[i];
2468       jj++;
2469     }
2470 
2471     /* read in other processors (except the last one) and ship out */
2472     for (i=1; i<size-1; i++) {
2473       nz   = procsnz[i];
2474       vals = buf;
2475       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2476       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2477     }
2478     /* the last proc */
2479     if (size != 1) {
2480       nz   = procsnz[i] - extra_rows;
2481       vals = buf;
2482       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2483       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2484       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2485     }
2486     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2487 
2488   } else {
2489     /* receive numeric values */
2490     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2491 
2492     /* receive message of values*/
2493     vals   = buf;
2494     mycols = ibuf;
2495     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2496     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2497     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2498 
2499     /* insert into matrix */
2500     jj = rstart*bs;
2501     for (i=0; i<m; i++) {
2502       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2503       mycols += locrowlens[i];
2504       vals   += locrowlens[i];
2505       jj++;
2506     }
2507   }
2508 
2509   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2510   ierr = PetscFree(buf);CHKERRQ(ierr);
2511   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2512   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2513   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2514   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2515   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2516   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2522 /*XXXXX@
2523    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2524 
2525    Input Parameters:
2526 .  mat  - the matrix
2527 .  fact - factor
2528 
2529    Not Collective on Mat, each process can have a different hash factor
2530 
2531    Level: advanced
2532 
2533   Notes:
2534    This can also be set by the command line option: -mat_use_hash_table fact
2535 
2536 .keywords: matrix, hashtable, factor, HT
2537 
2538 .seealso: MatSetOption()
2539 @XXXXX*/
2540 
2541 
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2544 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2545 {
2546   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2547   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2548   PetscReal      atmp;
2549   PetscReal      *work,*svalues,*rvalues;
2550   PetscErrorCode ierr;
2551   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2552   PetscMPIInt    rank,size;
2553   PetscInt       *rowners_bs,dest,count,source;
2554   PetscScalar    *va;
2555   MatScalar      *ba;
2556   MPI_Status     stat;
2557 
2558   PetscFunctionBegin;
2559   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2560   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2561   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2562 
2563   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2564   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2565 
2566   bs  = A->rmap->bs;
2567   mbs = a->mbs;
2568   Mbs = a->Mbs;
2569   ba  = b->a;
2570   bi  = b->i;
2571   bj  = b->j;
2572 
2573   /* find ownerships */
2574   rowners_bs = A->rmap->range;
2575 
2576   /* each proc creates an array to be distributed */
2577   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2578   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2579 
2580   /* row_max for B */
2581   if (rank != size-1) {
2582     for (i=0; i<mbs; i++) {
2583       ncols = bi[1] - bi[0]; bi++;
2584       brow  = bs*i;
2585       for (j=0; j<ncols; j++) {
2586         bcol = bs*(*bj);
2587         for (kcol=0; kcol<bs; kcol++) {
2588           col  = bcol + kcol;                /* local col index */
2589           col += rowners_bs[rank+1];      /* global col index */
2590           for (krow=0; krow<bs; krow++) {
2591             atmp = PetscAbsScalar(*ba); ba++;
2592             row  = brow + krow;   /* local row index */
2593             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2594             if (work[col] < atmp) work[col] = atmp;
2595           }
2596         }
2597         bj++;
2598       }
2599     }
2600 
2601     /* send values to its owners */
2602     for (dest=rank+1; dest<size; dest++) {
2603       svalues = work + rowners_bs[dest];
2604       count   = rowners_bs[dest+1]-rowners_bs[dest];
2605       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2606     }
2607   }
2608 
2609   /* receive values */
2610   if (rank) {
2611     rvalues = work;
2612     count   = rowners_bs[rank+1]-rowners_bs[rank];
2613     for (source=0; source<rank; source++) {
2614       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2615       /* process values */
2616       for (i=0; i<count; i++) {
2617         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2618       }
2619     }
2620   }
2621 
2622   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2623   ierr = PetscFree(work);CHKERRQ(ierr);
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 #undef __FUNCT__
2628 #define __FUNCT__ "MatSOR_MPISBAIJ"
2629 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2630 {
2631   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2632   PetscErrorCode    ierr;
2633   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2634   PetscScalar       *x,*ptr,*from;
2635   Vec               bb1;
2636   const PetscScalar *b;
2637 
2638   PetscFunctionBegin;
2639   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2640   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2641 
2642   if (flag == SOR_APPLY_UPPER) {
2643     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2644     PetscFunctionReturn(0);
2645   }
2646 
2647   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2648     if (flag & SOR_ZERO_INITIAL_GUESS) {
2649       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2650       its--;
2651     }
2652 
2653     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2654     while (its--) {
2655 
2656       /* lower triangular part: slvec0b = - B^T*xx */
2657       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2658 
2659       /* copy xx into slvec0a */
2660       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2661       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2662       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2663       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2664 
2665       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2666 
2667       /* copy bb into slvec1a */
2668       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2669       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2670       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2671       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2672 
2673       /* set slvec1b = 0 */
2674       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2675 
2676       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2677       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2678       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2679       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2680 
2681       /* upper triangular part: bb1 = bb1 - B*x */
2682       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2683 
2684       /* local diagonal sweep */
2685       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2686     }
2687     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2688   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2689     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2690   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2691     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2692   } else if (flag & SOR_EISENSTAT) {
2693     Vec               xx1;
2694     PetscBool         hasop;
2695     const PetscScalar *diag;
2696     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2697     PetscInt          i,n;
2698 
2699     if (!mat->xx1) {
2700       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2701       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2702     }
2703     xx1 = mat->xx1;
2704     bb1 = mat->bb1;
2705 
2706     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2707 
2708     if (!mat->diag) {
2709       /* this is wrong for same matrix with new nonzero values */
2710       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2711       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2712     }
2713     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2714 
2715     if (hasop) {
2716       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2717       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2718     } else {
2719       /*
2720           These two lines are replaced by code that may be a bit faster for a good compiler
2721       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2722       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2723       */
2724       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2725       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2726       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2727       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2728       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2729       if (omega == 1.0) {
2730         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2731         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2732       } else {
2733         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2734         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2735       }
2736       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2737       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2738       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2739       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2740     }
2741 
2742     /* multiply off-diagonal portion of matrix */
2743     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2744     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2745     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2746     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2747     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2748     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2749     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2750     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2751     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2752     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2753 
2754     /* local sweep */
2755     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2756     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2757   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 #undef __FUNCT__
2762 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2763 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2764 {
2765   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2766   PetscErrorCode ierr;
2767   Vec            lvec1,bb1;
2768 
2769   PetscFunctionBegin;
2770   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2771   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2772 
2773   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2774     if (flag & SOR_ZERO_INITIAL_GUESS) {
2775       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2776       its--;
2777     }
2778 
2779     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2780     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2781     while (its--) {
2782       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2783 
2784       /* lower diagonal part: bb1 = bb - B^T*xx */
2785       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2786       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2787 
2788       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2789       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2790       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2791 
2792       /* upper diagonal part: bb1 = bb1 - B*x */
2793       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2794       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2795 
2796       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2797 
2798       /* diagonal sweep */
2799       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2800     }
2801     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2802     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2803   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 #undef __FUNCT__
2808 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2809 /*@
2810      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2811          CSR format the local rows.
2812 
2813    Collective on MPI_Comm
2814 
2815    Input Parameters:
2816 +  comm - MPI communicator
2817 .  bs - the block size, only a block size of 1 is supported
2818 .  m - number of local rows (Cannot be PETSC_DECIDE)
2819 .  n - This value should be the same as the local size used in creating the
2820        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2821        calculated if N is given) For square matrices n is almost always m.
2822 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2823 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2824 .   i - row indices
2825 .   j - column indices
2826 -   a - matrix values
2827 
2828    Output Parameter:
2829 .   mat - the matrix
2830 
2831    Level: intermediate
2832 
2833    Notes:
2834        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2835      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2836      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2837 
2838        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2839 
2840 .keywords: matrix, aij, compressed row, sparse, parallel
2841 
2842 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2843           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2844 @*/
2845 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2846 {
2847   PetscErrorCode ierr;
2848 
2849 
2850   PetscFunctionBegin;
2851   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2852   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2853   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2854   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2855   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2856   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2857   PetscFunctionReturn(0);
2858 }
2859 
2860 
2861 #undef __FUNCT__
2862 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2863 /*@C
2864    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2865    (the default parallel PETSc format).
2866 
2867    Collective on MPI_Comm
2868 
2869    Input Parameters:
2870 +  B - the matrix
2871 .  bs - the block size
2872 .  i - the indices into j for the start of each local row (starts with zero)
2873 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2874 -  v - optional values in the matrix
2875 
2876    Level: developer
2877 
2878 .keywords: matrix, aij, compressed row, sparse, parallel
2879 
2880 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2881 @*/
2882 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2883 {
2884   PetscErrorCode ierr;
2885 
2886   PetscFunctionBegin;
2887   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2888   PetscFunctionReturn(0);
2889 }
2890 
2891 #undef __FUNCT__
2892 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2893 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2894 {
2895   PetscErrorCode ierr;
2896   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2897   PetscInt       *indx;
2898   PetscScalar    *values;
2899 
2900   PetscFunctionBegin;
2901   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2902   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2903     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2904     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2905     PetscInt       *bindx,rmax=a->rmax,j;
2906 
2907     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2908     mbs = m/bs; Nbs = N/cbs;
2909     if (n == PETSC_DECIDE) {
2910       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2911     }
2912     /* Check sum(n) = Nbs */
2913     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2914     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2915 
2916     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2917     rstart -= mbs;
2918 
2919     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2920     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2921     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2922     for (i=0; i<mbs; i++) {
2923       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2924       nnz = nnz/bs;
2925       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2926       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2927       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2928     }
2929     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2930     ierr = PetscFree(bindx);CHKERRQ(ierr);
2931 
2932     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2933     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2934     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2935     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2936     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2937     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2938   }
2939 
2940   /* numeric phase */
2941   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2942   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2943 
2944   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2945   for (i=0; i<m; i++) {
2946     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2947     Ii   = i + rstart;
2948     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2949     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2950   }
2951   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2952   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954   PetscFunctionReturn(0);
2955 }
2956