xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 55d14e7db00c55374b7a73614b4a2091d5afcedc)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 #if defined(PETSC_HAVE_ELEMENTAL)
23 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
24 #endif
25 
26 #undef __FUNCT__
27 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
28 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
29 {
30   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
31   PetscErrorCode ierr;
32 
33   PetscFunctionBegin;
34   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
35   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
36   PetscFunctionReturn(0);
37 }
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
53   { \
54  \
55     brow = row/bs;  \
56     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
57     rmax = aimax[brow]; nrow = ailen[brow]; \
58     bcol = col/bs; \
59     ridx = row % bs; cidx = col % bs; \
60     low  = 0; high = nrow; \
61     while (high-low > 3) { \
62       t = (low+high)/2; \
63       if (rp[t] > bcol) high = t; \
64       else              low  = t; \
65     } \
66     for (_i=low; _i<high; _i++) { \
67       if (rp[_i] > bcol) break; \
68       if (rp[_i] == bcol) { \
69         bap = ap + bs2*_i + bs*cidx + ridx; \
70         if (addv == ADD_VALUES) *bap += value;  \
71         else                    *bap  = value;  \
72         goto a_noinsert; \
73       } \
74     } \
75     if (a->nonew == 1) goto a_noinsert; \
76     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
77     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
78     N = nrow++ - 1;  \
79     /* shift up all the later entries in this row */ \
80     for (ii=N; ii>=_i; ii--) { \
81       rp[ii+1] = rp[ii]; \
82       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
83     } \
84     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
85     rp[_i]                      = bcol;  \
86     ap[bs2*_i + bs*cidx + ridx] = value;  \
87     A->nonzerostate++;\
88 a_noinsert:; \
89     ailen[brow] = nrow; \
90   }
91 
92 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
93   { \
94     brow = row/bs;  \
95     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
96     rmax = bimax[brow]; nrow = bilen[brow]; \
97     bcol = col/bs; \
98     ridx = row % bs; cidx = col % bs; \
99     low  = 0; high = nrow; \
100     while (high-low > 3) { \
101       t = (low+high)/2; \
102       if (rp[t] > bcol) high = t; \
103       else              low  = t; \
104     } \
105     for (_i=low; _i<high; _i++) { \
106       if (rp[_i] > bcol) break; \
107       if (rp[_i] == bcol) { \
108         bap = ap + bs2*_i + bs*cidx + ridx; \
109         if (addv == ADD_VALUES) *bap += value;  \
110         else                    *bap  = value;  \
111         goto b_noinsert; \
112       } \
113     } \
114     if (b->nonew == 1) goto b_noinsert; \
115     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
116     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
117     N = nrow++ - 1;  \
118     /* shift up all the later entries in this row */ \
119     for (ii=N; ii>=_i; ii--) { \
120       rp[ii+1] = rp[ii]; \
121       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122     } \
123     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
124     rp[_i]                      = bcol;  \
125     ap[bs2*_i + bs*cidx + ridx] = value;  \
126     B->nonzerostate++;\
127 b_noinsert:; \
128     bilen[brow] = nrow; \
129   }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat          A     = baij->A;
149   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar    *aa   =a->a;
152 
153   Mat         B     = baij->B;
154   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar   *ba   =b->a;
157 
158   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar *ap,*bap;
161 
162   /* for stash */
163   PetscInt  n_loc, *in_loc = NULL;
164   MatScalar *v_loc = NULL;
165 
166   PetscFunctionBegin;
167   if (!baij->donotstash) {
168     if (n > baij->n_loc) {
169       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
171       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
173 
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
192         }
193         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
194           col  = in[j] - cstart_orig;         /* local col index */
195           brow = row/bs; bcol = col/bs;
196           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
197           if (roworiented) value = v[i*n+j];
198           else             value = v[i+j*m];
199           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
200           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
201         } else if (in[j] < 0) continue;
202 #if defined(PETSC_USE_DEBUG)
203         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
204 #endif
205         else {  /* off-diag entry (B) */
206           if (mat->was_assembled) {
207             if (!baij->colmap) {
208               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
209             }
210 #if defined(PETSC_USE_CTABLE)
211             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
212             col  = col - 1;
213 #else
214             col = baij->colmap[in[j]/bs] - 1;
215 #endif
216             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
217               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
218               col  =  in[j];
219               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
220               B    = baij->B;
221               b    = (Mat_SeqBAIJ*)(B)->data;
222               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
223               ba   = b->a;
224             } else col += in[j]%bs;
225           } else col = in[j];
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc          = 0;
237         for (j=0; j<n; j++) {
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray     =baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if (!barray) {
269     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if ((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++ = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend) {
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         } else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined(PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined(PETSC_USE_CTABLE)
333             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col  = in[j];
341             }
342           } else col = in[j];
343           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
344         }
345       }
346     } else {
347       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend) {
379           col  = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined(PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data--;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
399   }
400   PetscFunctionReturn(0);
401 }
402 
403 #undef __FUNCT__
404 #define __FUNCT__ "MatNorm_MPISBAIJ"
405 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
406 {
407   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
408   PetscErrorCode ierr;
409   PetscReal      sum[2],*lnorm2;
410 
411   PetscFunctionBegin;
412   if (baij->size == 1) {
413     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
414   } else {
415     if (type == NORM_FROBENIUS) {
416       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
417       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
419       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
421       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
422       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
423       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
424     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
425       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
426       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
427       PetscReal    *rsum,*rsum2,vabs;
428       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
429       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
430       MatScalar    *v;
431 
432       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
433       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz   = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++) {
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++) {
442             for (row=0; row<bs; row++) {
443               vabs            = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++) {
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++) {
459             for (row=0; row<bs; row++) {
460               vabs            = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
473     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
474   }
475   PetscFunctionReturn(0);
476 }
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
480 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
481 {
482   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
483   PetscErrorCode ierr;
484   PetscInt       nstash,reallocs;
485   InsertMode     addv;
486 
487   PetscFunctionBegin;
488   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
489 
490   /* make sure all processors are either in INSERTMODE or ADDMODE */
491   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
492   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
493   mat->insertmode = addv; /* in case this processor had no cache */
494 
495   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
496   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
500   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
506 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
507 {
508   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
509   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
510   PetscErrorCode ierr;
511   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
512   PetscInt       *row,*col;
513   PetscBool      other_disassembled;
514   PetscMPIInt    n;
515   PetscBool      r1,r2,r3;
516   MatScalar      *val;
517   InsertMode     addv = mat->insertmode;
518 
519   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
520   PetscFunctionBegin;
521   if (!baij->donotstash &&  !mat->nooffprocentries) {
522     while (1) {
523       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
524       if (!flg) break;
525 
526       for (i=0; i<n;) {
527         /* Now identify the consecutive vals belonging to the same row */
528         for (j=i,rstart=row[j]; j<n; j++) {
529           if (row[j] != rstart) break;
530         }
531         if (j < n) ncols = j-i;
532         else       ncols = n-i;
533         /* Now assemble all these values with a single function call */
534         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
535         i    = j;
536       }
537     }
538     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
539     /* Now process the block-stash. Since the values are stashed column-oriented,
540        set the roworiented flag to column oriented, and after MatSetValues()
541        restore the original flags */
542     r1 = baij->roworiented;
543     r2 = a->roworiented;
544     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
545 
546     baij->roworiented = PETSC_FALSE;
547     a->roworiented    = PETSC_FALSE;
548 
549     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
550     while (1) {
551       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
552       if (!flg) break;
553 
554       for (i=0; i<n;) {
555         /* Now identify the consecutive vals belonging to the same row */
556         for (j=i,rstart=row[j]; j<n; j++) {
557           if (row[j] != rstart) break;
558         }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i    = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566 
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569 
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596 
597   baij->rowvalues = 0;
598 
599   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
600   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
601     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
602     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
608 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
609 #include <petscdraw.h>
610 #undef __FUNCT__
611 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
612 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
613 {
614   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
615   PetscErrorCode    ierr;
616   PetscInt          bs   = mat->rmap->bs;
617   PetscMPIInt       rank = baij->rank;
618   PetscBool         iascii,isdraw;
619   PetscViewer       sviewer;
620   PetscViewerFormat format;
621 
622   PetscFunctionBegin;
623   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
624   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
625   if (iascii) {
626     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
627     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
628       MatInfo info;
629       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
630       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
633       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
635       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
636       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
637       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
638       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
639       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
640       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_INFO) {
643       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
644       PetscFunctionReturn(0);
645     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
646       PetscFunctionReturn(0);
647     }
648   }
649 
650   if (isdraw) {
651     PetscDraw draw;
652     PetscBool isnull;
653     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
654     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
655   }
656 
657   {
658     /* assemble the entire matrix onto first processor. */
659     Mat          A;
660     Mat_SeqSBAIJ *Aloc;
661     Mat_SeqBAIJ  *Bloc;
662     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
663     MatScalar    *a;
664     const char   *matname;
665 
666     /* Should this be the same type as mat? */
667     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
668     if (!rank) {
669       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
670     } else {
671       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
672     }
673     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
674     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
675     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
676     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
677 
678     /* copy over the A part */
679     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
680     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
681     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
682 
683     for (i=0; i<mbs; i++) {
684       rvals[0] = bs*(baij->rstartbs + i);
685       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
686       for (j=ai[i]; j<ai[i+1]; j++) {
687         col = (baij->cstartbs+aj[j])*bs;
688         for (k=0; k<bs; k++) {
689           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
690           col++;
691           a += bs;
692         }
693       }
694     }
695     /* copy over the B part */
696     Bloc = (Mat_SeqBAIJ*)baij->B->data;
697     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
698     for (i=0; i<mbs; i++) {
699 
700       rvals[0] = bs*(baij->rstartbs + i);
701       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
702       for (j=ai[i]; j<ai[i+1]; j++) {
703         col = baij->garray[aj[j]]*bs;
704         for (k=0; k<bs; k++) {
705           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
706           col++;
707           a += bs;
708         }
709       }
710     }
711     ierr = PetscFree(rvals);CHKERRQ(ierr);
712     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
713     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
714     /*
715        Everyone has to call to draw the matrix since the graphics waits are
716        synchronized across all processors that share the PetscDraw object
717     */
718     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
720     if (!rank) {
721       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
722       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
723     }
724     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     ierr = MatDestroy(&A);CHKERRQ(ierr);
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatView_MPISBAIJ"
732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
733 {
734   PetscErrorCode ierr;
735   PetscBool      iascii,isdraw,issocket,isbinary;
736 
737   PetscFunctionBegin;
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
740   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
741   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
742   if (iascii || isdraw || issocket || isbinary) {
743     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatDestroy_MPISBAIJ"
750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
751 {
752   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756 #if defined(PETSC_USE_LOG)
757   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
758 #endif
759   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
760   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
761   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
762   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
763 #if defined(PETSC_USE_CTABLE)
764   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
765 #else
766   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
767 #endif
768   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
770   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
776   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
777   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
778   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
779   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
781   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
782   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
783 #if defined(PETSC_USE_REAL_MAT_SINGLE)
784   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
785 #endif
786   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
788   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
789   ierr = PetscFree(mat->data);CHKERRQ(ierr);
790 
791   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
796   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
797 #if defined(PETSC_HAVE_ELEMENTAL)
798   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_elemental_C",NULL);CHKERRQ(ierr);
799 #endif
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
805 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
806 {
807   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
808   PetscErrorCode ierr;
809   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
810   PetscScalar    *x,*from;
811 
812   PetscFunctionBegin;
813   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
814   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
815 
816   /* diagonal part */
817   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
818   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
819 
820   /* subdiagonal part */
821   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
822 
823   /* copy x into the vec slvec0 */
824   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
825   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
826 
827   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
828   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
829   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
830 
831   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
833   /* supperdiagonal part */
834   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 #undef __FUNCT__
839 #define __FUNCT__ "MatMult_MPISBAIJ"
840 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
841 {
842   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
843   PetscErrorCode    ierr;
844   PetscInt          nt,mbs=a->mbs,bs=A->rmap->bs;
845   PetscScalar       *from;
846   const PetscScalar *x;
847 
848   PetscFunctionBegin;
849   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
850   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
851 
852   /* diagonal part */
853   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
854   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
855 
856   /* subdiagonal part */
857   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
858 
859   /* copy x into the vec slvec0 */
860   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
861   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
862 
863   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
864   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
865   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
866 
867   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   /* supperdiagonal part */
870   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
876 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
877 {
878   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
879   PetscErrorCode ierr;
880   PetscInt       nt;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
885 
886   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
887   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
888 
889   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890   /* do diagonal part */
891   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
892   /* do supperdiagonal part */
893   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
895   /* do subdiagonal part */
896   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
897   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
898   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
904 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
905 {
906   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
907   PetscErrorCode    ierr;
908   PetscInt          mbs=a->mbs,bs=A->rmap->bs;
909   PetscScalar       *from,zero=0.0;
910   const PetscScalar *x;
911 
912   PetscFunctionBegin;
913   /*
914   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
915   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
916   */
917   /* diagonal part */
918   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
919   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
920 
921   /* subdiagonal part */
922   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
923 
924   /* copy x into the vec slvec0 */
925   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
926   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
927   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
928   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
929 
930   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
932   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
933 
934   /* supperdiagonal part */
935   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
941 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
942 {
943   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
944   PetscErrorCode ierr;
945 
946   PetscFunctionBegin;
947   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   /* do diagonal part */
949   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
950   /* do supperdiagonal part */
951   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
952   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
953 
954   /* do subdiagonal part */
955   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
956   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
957   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 
961 /*
962   This only works correctly for square matrices where the subblock A->A is the
963    diagonal block
964 */
965 #undef __FUNCT__
966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
968 {
969   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
974   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatScale_MPISBAIJ"
980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
981 {
982   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
987   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatGetRow_MPISBAIJ"
993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
994 {
995   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
996   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
997   PetscErrorCode ierr;
998   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
999   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1000   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1001 
1002   PetscFunctionBegin;
1003   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1004   mat->getrowactive = PETSC_TRUE;
1005 
1006   if (!mat->rowvalues && (idx || v)) {
1007     /*
1008         allocate enough space to hold information from the longest row.
1009     */
1010     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1011     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1012     PetscInt     max = 1,mbs = mat->mbs,tmp;
1013     for (i=0; i<mbs; i++) {
1014       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1015       if (max < tmp) max = tmp;
1016     }
1017     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1018   }
1019 
1020   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1021   lrow = row - brstart;  /* local row index */
1022 
1023   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1024   if (!v)   {pvA = 0; pvB = 0;}
1025   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1026   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1027   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1028   nztot = nzA + nzB;
1029 
1030   cmap = mat->garray;
1031   if (v  || idx) {
1032     if (nztot) {
1033       /* Sort by increasing column numbers, assuming A and B already sorted */
1034       PetscInt imark = -1;
1035       if (v) {
1036         *v = v_p = mat->rowvalues;
1037         for (i=0; i<nzB; i++) {
1038           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1039           else break;
1040         }
1041         imark = i;
1042         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1043         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1044       }
1045       if (idx) {
1046         *idx = idx_p = mat->rowindices;
1047         if (imark > -1) {
1048           for (i=0; i<imark; i++) {
1049             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1050           }
1051         } else {
1052           for (i=0; i<nzB; i++) {
1053             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1054             else break;
1055           }
1056           imark = i;
1057         }
1058         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1059         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1060       }
1061     } else {
1062       if (idx) *idx = 0;
1063       if (v)   *v   = 0;
1064     }
1065   }
1066   *nz  = nztot;
1067   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1068   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1075 {
1076   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1077 
1078   PetscFunctionBegin;
1079   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1080   baij->getrowactive = PETSC_FALSE;
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1086 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1087 {
1088   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1089   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1090 
1091   PetscFunctionBegin;
1092   aA->getrow_utriangular = PETSC_TRUE;
1093   PetscFunctionReturn(0);
1094 }
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1097 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1098 {
1099   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1100   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1101 
1102   PetscFunctionBegin;
1103   aA->getrow_utriangular = PETSC_FALSE;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1109 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1110 {
1111   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1116   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1122 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1123 {
1124   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1129   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 /* Check if isrow is a subset of iscol_local, called by MatGetSubMatrix_MPISBAIJ()
1134    Input: isrow       - distributed(parallel),
1135           iscol_local - locally owned (seq)
1136 */
1137 #undef __FUNCT__
1138 #define __FUNCT__ "ISEqual_private"
1139 PetscErrorCode ISEqual_private(IS isrow,IS iscol_local,PetscBool  *flg)
1140 {
1141   PetscErrorCode ierr;
1142   PetscInt       sz1,sz2,*a1,*a2,i,j,k,nmatch;
1143   const PetscInt *ptr1,*ptr2;
1144 
1145   PetscFunctionBegin;
1146   ierr = ISGetLocalSize(isrow,&sz1);CHKERRQ(ierr);
1147   ierr = ISGetLocalSize(iscol_local,&sz2);CHKERRQ(ierr);
1148   if (sz1 > sz2) *flg = PETSC_FALSE;
1149 
1150   ierr = ISGetIndices(isrow,&ptr1);CHKERRQ(ierr);
1151   ierr = ISGetIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1152 
1153   ierr = PetscMalloc1(sz1,&a1);CHKERRQ(ierr);
1154   ierr = PetscMalloc1(sz2,&a2);CHKERRQ(ierr);
1155   ierr = PetscMemcpy(a1,ptr1,sz1*sizeof(PetscInt));CHKERRQ(ierr);
1156   ierr = PetscMemcpy(a2,ptr2,sz2*sizeof(PetscInt));CHKERRQ(ierr);
1157   ierr = PetscSortInt(sz1,a1);CHKERRQ(ierr);
1158   ierr = PetscSortInt(sz2,a2);CHKERRQ(ierr);
1159 
1160   nmatch=0;
1161   k     = 0;
1162   for (i=0; i<sz1; i++){
1163     for (j=k; j<sz2; j++){
1164       if (a1[i] == a2[j]) {
1165         k = j; nmatch++;
1166         break;
1167       }
1168     }
1169   }
1170   ierr = ISRestoreIndices(isrow,&ptr1);CHKERRQ(ierr);
1171   ierr = ISRestoreIndices(iscol_local,&ptr2);CHKERRQ(ierr);
1172   ierr = PetscFree(a1);CHKERRQ(ierr);
1173   ierr = PetscFree(a2);CHKERRQ(ierr);
1174   if (nmatch < sz1) *flg = PETSC_FALSE;
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatGetSubMatrix_MPISBAIJ"
1180 PetscErrorCode MatGetSubMatrix_MPISBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1181 {
1182   PetscErrorCode ierr;
1183   IS             iscol_local;
1184   PetscInt       csize;
1185   PetscBool      isequal;
1186 
1187   PetscFunctionBegin;
1188   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1189   if (call == MAT_REUSE_MATRIX) {
1190     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1191     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1192   } else {
1193     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1194     ierr = ISEqual_private(isrow,iscol_local,&isequal);CHKERRQ(ierr);
1195     if (!isequal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"For symmetric format, iscol must equal isrow");
1196   }
1197 
1198   /* now call MatGetSubMatrix_MPIBAIJ() */
1199   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1200   if (call == MAT_INITIAL_MATRIX) {
1201     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1202     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1209 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1210 {
1211   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1212   PetscErrorCode ierr;
1213 
1214   PetscFunctionBegin;
1215   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1216   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1222 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1223 {
1224   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1225   Mat            A  = a->A,B = a->B;
1226   PetscErrorCode ierr;
1227   PetscReal      isend[5],irecv[5];
1228 
1229   PetscFunctionBegin;
1230   info->block_size = (PetscReal)matin->rmap->bs;
1231 
1232   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1233 
1234   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1235   isend[3] = info->memory;  isend[4] = info->mallocs;
1236 
1237   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1238 
1239   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1240   isend[3] += info->memory;  isend[4] += info->mallocs;
1241   if (flag == MAT_LOCAL) {
1242     info->nz_used      = isend[0];
1243     info->nz_allocated = isend[1];
1244     info->nz_unneeded  = isend[2];
1245     info->memory       = isend[3];
1246     info->mallocs      = isend[4];
1247   } else if (flag == MAT_GLOBAL_MAX) {
1248     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1249 
1250     info->nz_used      = irecv[0];
1251     info->nz_allocated = irecv[1];
1252     info->nz_unneeded  = irecv[2];
1253     info->memory       = irecv[3];
1254     info->mallocs      = irecv[4];
1255   } else if (flag == MAT_GLOBAL_SUM) {
1256     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1257 
1258     info->nz_used      = irecv[0];
1259     info->nz_allocated = irecv[1];
1260     info->nz_unneeded  = irecv[2];
1261     info->memory       = irecv[3];
1262     info->mallocs      = irecv[4];
1263   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1264   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1265   info->fill_ratio_needed = 0;
1266   info->factor_mallocs    = 0;
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1272 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1273 {
1274   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1275   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1276   PetscErrorCode ierr;
1277 
1278   PetscFunctionBegin;
1279   switch (op) {
1280   case MAT_NEW_NONZERO_LOCATIONS:
1281   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1282   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1283   case MAT_KEEP_NONZERO_PATTERN:
1284   case MAT_NEW_NONZERO_LOCATION_ERR:
1285     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1286     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1287     break;
1288   case MAT_ROW_ORIENTED:
1289     a->roworiented = flg;
1290 
1291     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1292     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1293     break;
1294   case MAT_NEW_DIAGONALS:
1295     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1296     break;
1297   case MAT_IGNORE_OFF_PROC_ENTRIES:
1298     a->donotstash = flg;
1299     break;
1300   case MAT_USE_HASH_TABLE:
1301     a->ht_flag = flg;
1302     break;
1303   case MAT_HERMITIAN:
1304     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1305     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1306 
1307     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1308     break;
1309   case MAT_SPD:
1310     A->spd_set = PETSC_TRUE;
1311     A->spd     = flg;
1312     if (flg) {
1313       A->symmetric                  = PETSC_TRUE;
1314       A->structurally_symmetric     = PETSC_TRUE;
1315       A->symmetric_set              = PETSC_TRUE;
1316       A->structurally_symmetric_set = PETSC_TRUE;
1317     }
1318     break;
1319   case MAT_SYMMETRIC:
1320     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1321     break;
1322   case MAT_STRUCTURALLY_SYMMETRIC:
1323     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1324     break;
1325   case MAT_SYMMETRY_ETERNAL:
1326     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1327     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1328     break;
1329   case MAT_IGNORE_LOWER_TRIANGULAR:
1330     aA->ignore_ltriangular = flg;
1331     break;
1332   case MAT_ERROR_LOWER_TRIANGULAR:
1333     aA->ignore_ltriangular = flg;
1334     break;
1335   case MAT_GETROW_UPPERTRIANGULAR:
1336     aA->getrow_utriangular = flg;
1337     break;
1338   default:
1339     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1346 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1347 {
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   if (MAT_INITIAL_MATRIX || *B != A) {
1352     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1359 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1360 {
1361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1362   Mat            a     = baij->A, b=baij->B;
1363   PetscErrorCode ierr;
1364   PetscInt       nv,m,n;
1365   PetscBool      flg;
1366 
1367   PetscFunctionBegin;
1368   if (ll != rr) {
1369     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1370     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1371   }
1372   if (!ll) PetscFunctionReturn(0);
1373 
1374   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1375   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1376 
1377   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1378   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1379 
1380   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1381 
1382   /* left diagonalscale the off-diagonal part */
1383   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1384 
1385   /* scale the diagonal part */
1386   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1387 
1388   /* right diagonalscale the off-diagonal part */
1389   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1390   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1396 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1397 {
1398   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatEqual_MPISBAIJ"
1410 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1411 {
1412   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1413   Mat            a,b,c,d;
1414   PetscBool      flg;
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   a = matA->A; b = matA->B;
1419   c = matB->A; d = matB->B;
1420 
1421   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1422   if (flg) {
1423     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1424   }
1425   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatCopy_MPISBAIJ"
1431 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1432 {
1433   PetscErrorCode ierr;
1434   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1435   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1436 
1437   PetscFunctionBegin;
1438   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1439   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1440     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1441     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1442     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1443   } else {
1444     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1445     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1446   }
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1452 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1453 {
1454   PetscErrorCode ierr;
1455 
1456   PetscFunctionBegin;
1457   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1458   PetscFunctionReturn(0);
1459 }
1460 
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1463 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1464 {
1465   PetscErrorCode ierr;
1466   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1467   PetscBLASInt   bnz,one=1;
1468   Mat_SeqSBAIJ   *xa,*ya;
1469   Mat_SeqBAIJ    *xb,*yb;
1470 
1471   PetscFunctionBegin;
1472   if (str == SAME_NONZERO_PATTERN) {
1473     PetscScalar alpha = a;
1474     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1475     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1476     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1477     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1478     xb   = (Mat_SeqBAIJ*)xx->B->data;
1479     yb   = (Mat_SeqBAIJ*)yy->B->data;
1480     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1481     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1482     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1483   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1484     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1485     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1486     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1487   } else {
1488     Mat      B;
1489     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1490     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1491     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1492     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1493     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1494     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1495     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1496     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1497     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1498     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1499     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1500     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1501     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1502     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1503     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1504     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1505     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1506     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1507     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1508     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1509   }
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1515 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1516 {
1517   PetscErrorCode ierr;
1518   PetscInt       i;
1519   PetscBool      flg;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1523   for (i=0; i<n; i++) {
1524     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1525     if (!flg) { /* *B[i] is non-symmetric, set flag */
1526       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1527     }
1528   }
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 /* -------------------------------------------------------------------*/
1533 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1534                                        MatGetRow_MPISBAIJ,
1535                                        MatRestoreRow_MPISBAIJ,
1536                                        MatMult_MPISBAIJ,
1537                                /*  4*/ MatMultAdd_MPISBAIJ,
1538                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1539                                        MatMultAdd_MPISBAIJ,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                /* 10*/ 0,
1544                                        0,
1545                                        0,
1546                                        MatSOR_MPISBAIJ,
1547                                        MatTranspose_MPISBAIJ,
1548                                /* 15*/ MatGetInfo_MPISBAIJ,
1549                                        MatEqual_MPISBAIJ,
1550                                        MatGetDiagonal_MPISBAIJ,
1551                                        MatDiagonalScale_MPISBAIJ,
1552                                        MatNorm_MPISBAIJ,
1553                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1554                                        MatAssemblyEnd_MPISBAIJ,
1555                                        MatSetOption_MPISBAIJ,
1556                                        MatZeroEntries_MPISBAIJ,
1557                                /* 24*/ 0,
1558                                        0,
1559                                        0,
1560                                        0,
1561                                        0,
1562                                /* 29*/ MatSetUp_MPISBAIJ,
1563                                        0,
1564                                        0,
1565                                        0,
1566                                        0,
1567                                /* 34*/ MatDuplicate_MPISBAIJ,
1568                                        0,
1569                                        0,
1570                                        0,
1571                                        0,
1572                                /* 39*/ MatAXPY_MPISBAIJ,
1573                                        MatGetSubMatrices_MPISBAIJ,
1574                                        MatIncreaseOverlap_MPISBAIJ,
1575                                        MatGetValues_MPISBAIJ,
1576                                        MatCopy_MPISBAIJ,
1577                                /* 44*/ 0,
1578                                        MatScale_MPISBAIJ,
1579                                        0,
1580                                        0,
1581                                        0,
1582                                /* 49*/ 0,
1583                                        0,
1584                                        0,
1585                                        0,
1586                                        0,
1587                                /* 54*/ 0,
1588                                        0,
1589                                        MatSetUnfactored_MPISBAIJ,
1590                                        0,
1591                                        MatSetValuesBlocked_MPISBAIJ,
1592                                /* 59*/ MatGetSubMatrix_MPISBAIJ,
1593                                        0,
1594                                        0,
1595                                        0,
1596                                        0,
1597                                /* 64*/ 0,
1598                                        0,
1599                                        0,
1600                                        0,
1601                                        0,
1602                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1603                                        0,
1604                                        0,
1605                                        0,
1606                                        0,
1607                                /* 74*/ 0,
1608                                        0,
1609                                        0,
1610                                        0,
1611                                        0,
1612                                /* 79*/ 0,
1613                                        0,
1614                                        0,
1615                                        0,
1616                                        MatLoad_MPISBAIJ,
1617                                /* 84*/ 0,
1618                                        0,
1619                                        0,
1620                                        0,
1621                                        0,
1622                                /* 89*/ 0,
1623                                        0,
1624                                        0,
1625                                        0,
1626                                        0,
1627                                /* 94*/ 0,
1628                                        0,
1629                                        0,
1630                                        0,
1631                                        0,
1632                                /* 99*/ 0,
1633                                        0,
1634                                        0,
1635                                        0,
1636                                        0,
1637                                /*104*/ 0,
1638                                        MatRealPart_MPISBAIJ,
1639                                        MatImaginaryPart_MPISBAIJ,
1640                                        MatGetRowUpperTriangular_MPISBAIJ,
1641                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1642                                /*109*/ 0,
1643                                        0,
1644                                        0,
1645                                        0,
1646                                        0,
1647                                /*114*/ 0,
1648                                        0,
1649                                        0,
1650                                        0,
1651                                        0,
1652                                /*119*/ 0,
1653                                        0,
1654                                        0,
1655                                        0,
1656                                        0,
1657                                /*124*/ 0,
1658                                        0,
1659                                        0,
1660                                        0,
1661                                        0,
1662                                /*129*/ 0,
1663                                        0,
1664                                        0,
1665                                        0,
1666                                        0,
1667                                /*134*/ 0,
1668                                        0,
1669                                        0,
1670                                        0,
1671                                        0,
1672                                /*139*/ 0,
1673                                        0,
1674                                        0,
1675                                        0,
1676                                        0,
1677                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1678 };
1679 
1680 #undef __FUNCT__
1681 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1682 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1683 {
1684   PetscFunctionBegin;
1685   *a = ((Mat_MPISBAIJ*)A->data)->A;
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1691 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1692 {
1693   Mat_MPISBAIJ   *b;
1694   PetscErrorCode ierr;
1695   PetscInt       i,mbs,Mbs;
1696 
1697   PetscFunctionBegin;
1698   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1699   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1700   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1701   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1702 
1703   b   = (Mat_MPISBAIJ*)B->data;
1704   mbs = B->rmap->n/bs;
1705   Mbs = B->rmap->N/bs;
1706   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1707 
1708   B->rmap->bs = bs;
1709   b->bs2      = bs*bs;
1710   b->mbs      = mbs;
1711   b->Mbs      = Mbs;
1712   b->nbs      = B->cmap->n/bs;
1713   b->Nbs      = B->cmap->N/bs;
1714 
1715   for (i=0; i<=b->size; i++) {
1716     b->rangebs[i] = B->rmap->range[i]/bs;
1717   }
1718   b->rstartbs = B->rmap->rstart/bs;
1719   b->rendbs   = B->rmap->rend/bs;
1720 
1721   b->cstartbs = B->cmap->rstart/bs;
1722   b->cendbs   = B->cmap->rend/bs;
1723 
1724   if (!B->preallocated) {
1725     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1726     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1727     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1728     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1729     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1730     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1731     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1732     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1733     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1734   }
1735 
1736   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1737   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1738 
1739   B->preallocated = PETSC_TRUE;
1740   PetscFunctionReturn(0);
1741 }
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1745 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1746 {
1747   PetscInt       m,rstart,cstart,cend;
1748   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1749   const PetscInt *JJ    =0;
1750   PetscScalar    *values=0;
1751   PetscErrorCode ierr;
1752 
1753   PetscFunctionBegin;
1754   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1755   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1756   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1757   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1758   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1759   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1760   m      = B->rmap->n/bs;
1761   rstart = B->rmap->rstart/bs;
1762   cstart = B->cmap->rstart/bs;
1763   cend   = B->cmap->rend/bs;
1764 
1765   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1766   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1767   for (i=0; i<m; i++) {
1768     nz = ii[i+1] - ii[i];
1769     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1770     nz_max = PetscMax(nz_max,nz);
1771     JJ     = jj + ii[i];
1772     for (j=0; j<nz; j++) {
1773       if (*JJ >= cstart) break;
1774       JJ++;
1775     }
1776     d = 0;
1777     for (; j<nz; j++) {
1778       if (*JJ++ >= cend) break;
1779       d++;
1780     }
1781     d_nnz[i] = d;
1782     o_nnz[i] = nz - d;
1783   }
1784   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1785   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1786 
1787   values = (PetscScalar*)V;
1788   if (!values) {
1789     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1790     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1791   }
1792   for (i=0; i<m; i++) {
1793     PetscInt          row    = i + rstart;
1794     PetscInt          ncols  = ii[i+1] - ii[i];
1795     const PetscInt    *icols = jj + ii[i];
1796     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1797     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1798   }
1799 
1800   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1801   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1802   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1803   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 /*MC
1808    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1809    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1810    the matrix is stored.
1811 
1812   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1813   can call MatSetOption(Mat, MAT_HERMITIAN);
1814 
1815    Options Database Keys:
1816 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1817 
1818   Level: beginner
1819 
1820 .seealso: MatCreateMPISBAIJ
1821 M*/
1822 
1823 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatCreate_MPISBAIJ"
1827 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1828 {
1829   Mat_MPISBAIJ   *b;
1830   PetscErrorCode ierr;
1831   PetscBool      flg = PETSC_FALSE;
1832 
1833   PetscFunctionBegin;
1834   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1835   B->data = (void*)b;
1836   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1837 
1838   B->ops->destroy = MatDestroy_MPISBAIJ;
1839   B->ops->view    = MatView_MPISBAIJ;
1840   B->assembled    = PETSC_FALSE;
1841   B->insertmode   = NOT_SET_VALUES;
1842 
1843   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1844   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1845 
1846   /* build local table of row and column ownerships */
1847   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1848 
1849   /* build cache for off array entries formed */
1850   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1851 
1852   b->donotstash  = PETSC_FALSE;
1853   b->colmap      = NULL;
1854   b->garray      = NULL;
1855   b->roworiented = PETSC_TRUE;
1856 
1857   /* stuff used in block assembly */
1858   b->barray = 0;
1859 
1860   /* stuff used for matrix vector multiply */
1861   b->lvec    = 0;
1862   b->Mvctx   = 0;
1863   b->slvec0  = 0;
1864   b->slvec0b = 0;
1865   b->slvec1  = 0;
1866   b->slvec1a = 0;
1867   b->slvec1b = 0;
1868   b->sMvctx  = 0;
1869 
1870   /* stuff for MatGetRow() */
1871   b->rowindices   = 0;
1872   b->rowvalues    = 0;
1873   b->getrowactive = PETSC_FALSE;
1874 
1875   /* hash table stuff */
1876   b->ht           = 0;
1877   b->hd           = 0;
1878   b->ht_size      = 0;
1879   b->ht_flag      = PETSC_FALSE;
1880   b->ht_fact      = 0;
1881   b->ht_total_ct  = 0;
1882   b->ht_insert_ct = 0;
1883 
1884   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1885   b->ijonly = PETSC_FALSE;
1886 
1887   b->in_loc = 0;
1888   b->v_loc  = 0;
1889   b->n_loc  = 0;
1890 
1891   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1892   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1893   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1894   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1895   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1896   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1897 #if defined(PETSC_HAVE_ELEMENTAL)
1898   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_elemental_C",MatConvert_MPISBAIJ_Elemental);CHKERRQ(ierr);
1899 #endif
1900 
1901   B->symmetric                  = PETSC_TRUE;
1902   B->structurally_symmetric     = PETSC_TRUE;
1903   B->symmetric_set              = PETSC_TRUE;
1904   B->structurally_symmetric_set = PETSC_TRUE;
1905 
1906   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1907   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1908   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1909   if (flg) {
1910     PetscReal fact = 1.39;
1911     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1912     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1913     if (fact <= 1.0) fact = 1.39;
1914     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1915     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1916   }
1917   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 /*MC
1922    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1923 
1924    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1925    and MATMPISBAIJ otherwise.
1926 
1927    Options Database Keys:
1928 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1929 
1930   Level: beginner
1931 
1932 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1933 M*/
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1937 /*@C
1938    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1939    the user should preallocate the matrix storage by setting the parameters
1940    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1941    performance can be increased by more than a factor of 50.
1942 
1943    Collective on Mat
1944 
1945    Input Parameters:
1946 +  B - the matrix
1947 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1948           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1949 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1950            submatrix  (same for all local rows)
1951 .  d_nnz - array containing the number of block nonzeros in the various block rows
1952            in the upper triangular and diagonal part of the in diagonal portion of the local
1953            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1954            for the diagonal entry and set a value even if it is zero.
1955 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1956            submatrix (same for all local rows).
1957 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1958            off-diagonal portion of the local submatrix that is right of the diagonal
1959            (possibly different for each block row) or NULL.
1960 
1961 
1962    Options Database Keys:
1963 .   -mat_no_unroll - uses code that does not unroll the loops in the
1964                      block calculations (much slower)
1965 .   -mat_block_size - size of the blocks to use
1966 
1967    Notes:
1968 
1969    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1970    than it must be used on all processors that share the object for that argument.
1971 
1972    If the *_nnz parameter is given then the *_nz parameter is ignored
1973 
1974    Storage Information:
1975    For a square global matrix we define each processor's diagonal portion
1976    to be its local rows and the corresponding columns (a square submatrix);
1977    each processor's off-diagonal portion encompasses the remainder of the
1978    local matrix (a rectangular submatrix).
1979 
1980    The user can specify preallocated storage for the diagonal part of
1981    the local submatrix with either d_nz or d_nnz (not both).  Set
1982    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1983    memory allocation.  Likewise, specify preallocated storage for the
1984    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1985 
1986    You can call MatGetInfo() to get information on how effective the preallocation was;
1987    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1988    You can also run with the option -info and look for messages with the string
1989    malloc in them to see if additional memory allocation was needed.
1990 
1991    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1992    the figure below we depict these three local rows and all columns (0-11).
1993 
1994 .vb
1995            0 1 2 3 4 5 6 7 8 9 10 11
1996           --------------------------
1997    row 3  |. . . d d d o o o o  o  o
1998    row 4  |. . . d d d o o o o  o  o
1999    row 5  |. . . d d d o o o o  o  o
2000           --------------------------
2001 .ve
2002 
2003    Thus, any entries in the d locations are stored in the d (diagonal)
2004    submatrix, and any entries in the o locations are stored in the
2005    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2006    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2007 
2008    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2009    plus the diagonal part of the d matrix,
2010    and o_nz should indicate the number of block nonzeros per row in the o matrix
2011 
2012    In general, for PDE problems in which most nonzeros are near the diagonal,
2013    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2014    or you will get TERRIBLE performance; see the users' manual chapter on
2015    matrices.
2016 
2017    Level: intermediate
2018 
2019 .keywords: matrix, block, aij, compressed row, sparse, parallel
2020 
2021 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
2022 @*/
2023 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2024 {
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2029   PetscValidType(B,1);
2030   PetscValidLogicalCollectiveInt(B,bs,2);
2031   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatCreateSBAIJ"
2037 /*@C
2038    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2039    (block compressed row).  For good matrix assembly performance
2040    the user should preallocate the matrix storage by setting the parameters
2041    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2042    performance can be increased by more than a factor of 50.
2043 
2044    Collective on MPI_Comm
2045 
2046    Input Parameters:
2047 +  comm - MPI communicator
2048 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2049           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2050 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2051            This value should be the same as the local size used in creating the
2052            y vector for the matrix-vector product y = Ax.
2053 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2054            This value should be the same as the local size used in creating the
2055            x vector for the matrix-vector product y = Ax.
2056 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2057 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2058 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2059            submatrix  (same for all local rows)
2060 .  d_nnz - array containing the number of block nonzeros in the various block rows
2061            in the upper triangular portion of the in diagonal portion of the local
2062            (possibly different for each block block row) or NULL.
2063            If you plan to factor the matrix you must leave room for the diagonal entry and
2064            set its value even if it is zero.
2065 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2066            submatrix (same for all local rows).
2067 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2068            off-diagonal portion of the local submatrix (possibly different for
2069            each block row) or NULL.
2070 
2071    Output Parameter:
2072 .  A - the matrix
2073 
2074    Options Database Keys:
2075 .   -mat_no_unroll - uses code that does not unroll the loops in the
2076                      block calculations (much slower)
2077 .   -mat_block_size - size of the blocks to use
2078 .   -mat_mpi - use the parallel matrix data structures even on one processor
2079                (defaults to using SeqBAIJ format on one processor)
2080 
2081    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2082    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2083    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2084 
2085    Notes:
2086    The number of rows and columns must be divisible by blocksize.
2087    This matrix type does not support complex Hermitian operation.
2088 
2089    The user MUST specify either the local or global matrix dimensions
2090    (possibly both).
2091 
2092    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2093    than it must be used on all processors that share the object for that argument.
2094 
2095    If the *_nnz parameter is given then the *_nz parameter is ignored
2096 
2097    Storage Information:
2098    For a square global matrix we define each processor's diagonal portion
2099    to be its local rows and the corresponding columns (a square submatrix);
2100    each processor's off-diagonal portion encompasses the remainder of the
2101    local matrix (a rectangular submatrix).
2102 
2103    The user can specify preallocated storage for the diagonal part of
2104    the local submatrix with either d_nz or d_nnz (not both).  Set
2105    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2106    memory allocation.  Likewise, specify preallocated storage for the
2107    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2108 
2109    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2110    the figure below we depict these three local rows and all columns (0-11).
2111 
2112 .vb
2113            0 1 2 3 4 5 6 7 8 9 10 11
2114           --------------------------
2115    row 3  |. . . d d d o o o o  o  o
2116    row 4  |. . . d d d o o o o  o  o
2117    row 5  |. . . d d d o o o o  o  o
2118           --------------------------
2119 .ve
2120 
2121    Thus, any entries in the d locations are stored in the d (diagonal)
2122    submatrix, and any entries in the o locations are stored in the
2123    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2124    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2125 
2126    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2127    plus the diagonal part of the d matrix,
2128    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2129    In general, for PDE problems in which most nonzeros are near the diagonal,
2130    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2131    or you will get TERRIBLE performance; see the users' manual chapter on
2132    matrices.
2133 
2134    Level: intermediate
2135 
2136 .keywords: matrix, block, aij, compressed row, sparse, parallel
2137 
2138 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2139 @*/
2140 
2141 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2142 {
2143   PetscErrorCode ierr;
2144   PetscMPIInt    size;
2145 
2146   PetscFunctionBegin;
2147   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2148   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2149   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2150   if (size > 1) {
2151     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2152     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2153   } else {
2154     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2155     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2156   }
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2163 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2164 {
2165   Mat            mat;
2166   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2167   PetscErrorCode ierr;
2168   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2169   PetscScalar    *array;
2170 
2171   PetscFunctionBegin;
2172   *newmat = 0;
2173 
2174   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2175   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2176   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2177   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2178   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2179   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2180 
2181   mat->factortype   = matin->factortype;
2182   mat->preallocated = PETSC_TRUE;
2183   mat->assembled    = PETSC_TRUE;
2184   mat->insertmode   = NOT_SET_VALUES;
2185 
2186   a      = (Mat_MPISBAIJ*)mat->data;
2187   a->bs2 = oldmat->bs2;
2188   a->mbs = oldmat->mbs;
2189   a->nbs = oldmat->nbs;
2190   a->Mbs = oldmat->Mbs;
2191   a->Nbs = oldmat->Nbs;
2192 
2193 
2194   a->size         = oldmat->size;
2195   a->rank         = oldmat->rank;
2196   a->donotstash   = oldmat->donotstash;
2197   a->roworiented  = oldmat->roworiented;
2198   a->rowindices   = 0;
2199   a->rowvalues    = 0;
2200   a->getrowactive = PETSC_FALSE;
2201   a->barray       = 0;
2202   a->rstartbs     = oldmat->rstartbs;
2203   a->rendbs       = oldmat->rendbs;
2204   a->cstartbs     = oldmat->cstartbs;
2205   a->cendbs       = oldmat->cendbs;
2206 
2207   /* hash table stuff */
2208   a->ht           = 0;
2209   a->hd           = 0;
2210   a->ht_size      = 0;
2211   a->ht_flag      = oldmat->ht_flag;
2212   a->ht_fact      = oldmat->ht_fact;
2213   a->ht_total_ct  = 0;
2214   a->ht_insert_ct = 0;
2215 
2216   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2217   if (oldmat->colmap) {
2218 #if defined(PETSC_USE_CTABLE)
2219     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2220 #else
2221     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2222     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2223     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2224 #endif
2225   } else a->colmap = 0;
2226 
2227   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2228     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2229     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2230     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2231   } else a->garray = 0;
2232 
2233   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2234   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2235   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2236   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2237   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2238 
2239   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2240   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2241   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2242   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2243 
2244   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2245   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2246   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2247   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2248   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2249   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2250   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2251   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2252   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2253   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2254   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2255   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2256   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2257 
2258   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2259   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2260   a->sMvctx = oldmat->sMvctx;
2261   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2262 
2263   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2264   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2265   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2266   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2267   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2268   *newmat = mat;
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatLoad_MPISBAIJ"
2274 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2275 {
2276   PetscErrorCode ierr;
2277   PetscInt       i,nz,j,rstart,rend;
2278   PetscScalar    *vals,*buf;
2279   MPI_Comm       comm;
2280   MPI_Status     status;
2281   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2282   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2283   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2284   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2285   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2286   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2287   int            fd;
2288 
2289   PetscFunctionBegin;
2290   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2291   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2292   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2293   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2294   if (bs < 0) bs = 1;
2295 
2296   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2297   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2298   if (!rank) {
2299     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2300     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2301     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2302     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2303   }
2304 
2305   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2306 
2307   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2308   M    = header[1];
2309   N    = header[2];
2310 
2311   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2312   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2313   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2314 
2315   /* If global sizes are set, check if they are consistent with that given in the file */
2316   if (sizesset) {
2317     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2318   }
2319   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2320   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2321 
2322   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2323 
2324   /*
2325      This code adds extra rows to make sure the number of rows is
2326      divisible by the blocksize
2327   */
2328   Mbs        = M/bs;
2329   extra_rows = bs - M + bs*(Mbs);
2330   if (extra_rows == bs) extra_rows = 0;
2331   else                  Mbs++;
2332   if (extra_rows &&!rank) {
2333     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2334   }
2335 
2336   /* determine ownership of all rows */
2337   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2338     mbs = Mbs/size + ((Mbs % size) > rank);
2339     m   = mbs*bs;
2340   } else { /* User Set */
2341     m   = newmat->rmap->n;
2342     mbs = m/bs;
2343   }
2344   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2345   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2346   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2347   rowners[0] = 0;
2348   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2349   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2350   rstart = rowners[rank];
2351   rend   = rowners[rank+1];
2352 
2353   /* distribute row lengths to all processors */
2354   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2355   if (!rank) {
2356     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2357     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2358     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2359     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2360     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2361     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2362     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2363   } else {
2364     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2365   }
2366 
2367   if (!rank) {   /* procs[0] */
2368     /* calculate the number of nonzeros on each processor */
2369     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2370     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2371     for (i=0; i<size; i++) {
2372       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2373         procsnz[i] += rowlengths[j];
2374       }
2375     }
2376     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2377 
2378     /* determine max buffer needed and allocate it */
2379     maxnz = 0;
2380     for (i=0; i<size; i++) {
2381       maxnz = PetscMax(maxnz,procsnz[i]);
2382     }
2383     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2384 
2385     /* read in my part of the matrix column indices  */
2386     nz     = procsnz[0];
2387     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2388     mycols = ibuf;
2389     if (size == 1) nz -= extra_rows;
2390     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2391     if (size == 1) {
2392       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2393     }
2394 
2395     /* read in every ones (except the last) and ship off */
2396     for (i=1; i<size-1; i++) {
2397       nz   = procsnz[i];
2398       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2399       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2400     }
2401     /* read in the stuff for the last proc */
2402     if (size != 1) {
2403       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2404       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2405       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2406       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2407     }
2408     ierr = PetscFree(cols);CHKERRQ(ierr);
2409   } else {  /* procs[i], i>0 */
2410     /* determine buffer space needed for message */
2411     nz = 0;
2412     for (i=0; i<m; i++) nz += locrowlens[i];
2413     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2414     mycols = ibuf;
2415     /* receive message of column indices*/
2416     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2417     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2418     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2419   }
2420 
2421   /* loop over local rows, determining number of off diagonal entries */
2422   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2423   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2424   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2425   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2426   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2427   rowcount = 0;
2428   nzcount  = 0;
2429   for (i=0; i<mbs; i++) {
2430     dcount  = 0;
2431     odcount = 0;
2432     for (j=0; j<bs; j++) {
2433       kmax = locrowlens[rowcount];
2434       for (k=0; k<kmax; k++) {
2435         tmp = mycols[nzcount++]/bs; /* block col. index */
2436         if (!mask[tmp]) {
2437           mask[tmp] = 1;
2438           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2439           else masked1[dcount++] = tmp; /* entry in diag portion */
2440         }
2441       }
2442       rowcount++;
2443     }
2444 
2445     dlens[i]  = dcount;  /* d_nzz[i] */
2446     odlens[i] = odcount; /* o_nzz[i] */
2447 
2448     /* zero out the mask elements we set */
2449     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2450     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2451   }
2452   if (!sizesset) {
2453     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2454   }
2455   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2456   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2457 
2458   if (!rank) {
2459     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2460     /* read in my part of the matrix numerical values  */
2461     nz     = procsnz[0];
2462     vals   = buf;
2463     mycols = ibuf;
2464     if (size == 1) nz -= extra_rows;
2465     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2466     if (size == 1) {
2467       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2468     }
2469 
2470     /* insert into matrix */
2471     jj = rstart*bs;
2472     for (i=0; i<m; i++) {
2473       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2474       mycols += locrowlens[i];
2475       vals   += locrowlens[i];
2476       jj++;
2477     }
2478 
2479     /* read in other processors (except the last one) and ship out */
2480     for (i=1; i<size-1; i++) {
2481       nz   = procsnz[i];
2482       vals = buf;
2483       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2484       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2485     }
2486     /* the last proc */
2487     if (size != 1) {
2488       nz   = procsnz[i] - extra_rows;
2489       vals = buf;
2490       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2491       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2492       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2493     }
2494     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2495 
2496   } else {
2497     /* receive numeric values */
2498     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2499 
2500     /* receive message of values*/
2501     vals   = buf;
2502     mycols = ibuf;
2503     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2504     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2505     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2506 
2507     /* insert into matrix */
2508     jj = rstart*bs;
2509     for (i=0; i<m; i++) {
2510       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2511       mycols += locrowlens[i];
2512       vals   += locrowlens[i];
2513       jj++;
2514     }
2515   }
2516 
2517   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2518   ierr = PetscFree(buf);CHKERRQ(ierr);
2519   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2520   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2521   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2522   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2523   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2524   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 #undef __FUNCT__
2529 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2530 /*XXXXX@
2531    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2532 
2533    Input Parameters:
2534 .  mat  - the matrix
2535 .  fact - factor
2536 
2537    Not Collective on Mat, each process can have a different hash factor
2538 
2539    Level: advanced
2540 
2541   Notes:
2542    This can also be set by the command line option: -mat_use_hash_table fact
2543 
2544 .keywords: matrix, hashtable, factor, HT
2545 
2546 .seealso: MatSetOption()
2547 @XXXXX*/
2548 
2549 
2550 #undef __FUNCT__
2551 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2552 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2553 {
2554   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2555   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2556   PetscReal      atmp;
2557   PetscReal      *work,*svalues,*rvalues;
2558   PetscErrorCode ierr;
2559   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2560   PetscMPIInt    rank,size;
2561   PetscInt       *rowners_bs,dest,count,source;
2562   PetscScalar    *va;
2563   MatScalar      *ba;
2564   MPI_Status     stat;
2565 
2566   PetscFunctionBegin;
2567   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2568   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2569   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2570 
2571   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2572   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2573 
2574   bs  = A->rmap->bs;
2575   mbs = a->mbs;
2576   Mbs = a->Mbs;
2577   ba  = b->a;
2578   bi  = b->i;
2579   bj  = b->j;
2580 
2581   /* find ownerships */
2582   rowners_bs = A->rmap->range;
2583 
2584   /* each proc creates an array to be distributed */
2585   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2586   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2587 
2588   /* row_max for B */
2589   if (rank != size-1) {
2590     for (i=0; i<mbs; i++) {
2591       ncols = bi[1] - bi[0]; bi++;
2592       brow  = bs*i;
2593       for (j=0; j<ncols; j++) {
2594         bcol = bs*(*bj);
2595         for (kcol=0; kcol<bs; kcol++) {
2596           col  = bcol + kcol;                /* local col index */
2597           col += rowners_bs[rank+1];      /* global col index */
2598           for (krow=0; krow<bs; krow++) {
2599             atmp = PetscAbsScalar(*ba); ba++;
2600             row  = brow + krow;   /* local row index */
2601             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2602             if (work[col] < atmp) work[col] = atmp;
2603           }
2604         }
2605         bj++;
2606       }
2607     }
2608 
2609     /* send values to its owners */
2610     for (dest=rank+1; dest<size; dest++) {
2611       svalues = work + rowners_bs[dest];
2612       count   = rowners_bs[dest+1]-rowners_bs[dest];
2613       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2614     }
2615   }
2616 
2617   /* receive values */
2618   if (rank) {
2619     rvalues = work;
2620     count   = rowners_bs[rank+1]-rowners_bs[rank];
2621     for (source=0; source<rank; source++) {
2622       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2623       /* process values */
2624       for (i=0; i<count; i++) {
2625         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2626       }
2627     }
2628   }
2629 
2630   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2631   ierr = PetscFree(work);CHKERRQ(ierr);
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatSOR_MPISBAIJ"
2637 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2638 {
2639   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2640   PetscErrorCode    ierr;
2641   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2642   PetscScalar       *x,*ptr,*from;
2643   Vec               bb1;
2644   const PetscScalar *b;
2645 
2646   PetscFunctionBegin;
2647   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2648   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2649 
2650   if (flag == SOR_APPLY_UPPER) {
2651     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2652     PetscFunctionReturn(0);
2653   }
2654 
2655   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2656     if (flag & SOR_ZERO_INITIAL_GUESS) {
2657       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2658       its--;
2659     }
2660 
2661     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2662     while (its--) {
2663 
2664       /* lower triangular part: slvec0b = - B^T*xx */
2665       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2666 
2667       /* copy xx into slvec0a */
2668       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2669       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2670       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2671       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2672 
2673       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2674 
2675       /* copy bb into slvec1a */
2676       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2677       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2678       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2679       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2680 
2681       /* set slvec1b = 0 */
2682       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2683 
2684       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2685       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2686       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688 
2689       /* upper triangular part: bb1 = bb1 - B*x */
2690       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2691 
2692       /* local diagonal sweep */
2693       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2694     }
2695     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2696   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2697     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2698   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2699     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2700   } else if (flag & SOR_EISENSTAT) {
2701     Vec               xx1;
2702     PetscBool         hasop;
2703     const PetscScalar *diag;
2704     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2705     PetscInt          i,n;
2706 
2707     if (!mat->xx1) {
2708       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2709       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2710     }
2711     xx1 = mat->xx1;
2712     bb1 = mat->bb1;
2713 
2714     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2715 
2716     if (!mat->diag) {
2717       /* this is wrong for same matrix with new nonzero values */
2718       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2719       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2720     }
2721     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2722 
2723     if (hasop) {
2724       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2725       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2726     } else {
2727       /*
2728           These two lines are replaced by code that may be a bit faster for a good compiler
2729       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2730       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2731       */
2732       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2733       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2734       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2735       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2736       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2737       if (omega == 1.0) {
2738         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2739         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2740       } else {
2741         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2742         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2743       }
2744       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2745       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2746       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2747       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2748     }
2749 
2750     /* multiply off-diagonal portion of matrix */
2751     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2752     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2753     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2754     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2755     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2756     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2757     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2758     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2759     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2760     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2761 
2762     /* local sweep */
2763     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2764     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2765   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 #undef __FUNCT__
2770 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2771 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2772 {
2773   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2774   PetscErrorCode ierr;
2775   Vec            lvec1,bb1;
2776 
2777   PetscFunctionBegin;
2778   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2779   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2780 
2781   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2782     if (flag & SOR_ZERO_INITIAL_GUESS) {
2783       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2784       its--;
2785     }
2786 
2787     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2788     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2789     while (its--) {
2790       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2791 
2792       /* lower diagonal part: bb1 = bb - B^T*xx */
2793       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2794       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2795 
2796       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2797       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2798       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2799 
2800       /* upper diagonal part: bb1 = bb1 - B*x */
2801       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2802       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2803 
2804       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2805 
2806       /* diagonal sweep */
2807       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2808     }
2809     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2810     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2811   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2817 /*@
2818      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2819          CSR format the local rows.
2820 
2821    Collective on MPI_Comm
2822 
2823    Input Parameters:
2824 +  comm - MPI communicator
2825 .  bs - the block size, only a block size of 1 is supported
2826 .  m - number of local rows (Cannot be PETSC_DECIDE)
2827 .  n - This value should be the same as the local size used in creating the
2828        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2829        calculated if N is given) For square matrices n is almost always m.
2830 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2831 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2832 .   i - row indices
2833 .   j - column indices
2834 -   a - matrix values
2835 
2836    Output Parameter:
2837 .   mat - the matrix
2838 
2839    Level: intermediate
2840 
2841    Notes:
2842        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2843      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2844      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2845 
2846        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2847 
2848 .keywords: matrix, aij, compressed row, sparse, parallel
2849 
2850 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2851           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2852 @*/
2853 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2854 {
2855   PetscErrorCode ierr;
2856 
2857 
2858   PetscFunctionBegin;
2859   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2860   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2861   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2862   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2863   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2864   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 
2869 #undef __FUNCT__
2870 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2871 /*@C
2872    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2873    (the default parallel PETSc format).
2874 
2875    Collective on MPI_Comm
2876 
2877    Input Parameters:
2878 +  B - the matrix
2879 .  bs - the block size
2880 .  i - the indices into j for the start of each local row (starts with zero)
2881 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2882 -  v - optional values in the matrix
2883 
2884    Level: developer
2885 
2886 .keywords: matrix, aij, compressed row, sparse, parallel
2887 
2888 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2889 @*/
2890 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2891 {
2892   PetscErrorCode ierr;
2893 
2894   PetscFunctionBegin;
2895   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2896   PetscFunctionReturn(0);
2897 }
2898 
2899 #undef __FUNCT__
2900 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2901 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2902 {
2903   PetscErrorCode ierr;
2904   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2905   PetscInt       *indx;
2906   PetscScalar    *values;
2907 
2908   PetscFunctionBegin;
2909   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2910   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2911     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2912     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2913     PetscInt       *bindx,rmax=a->rmax,j;
2914 
2915     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2916     mbs = m/bs; Nbs = N/cbs;
2917     if (n == PETSC_DECIDE) {
2918       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2919     }
2920     /* Check sum(n) = Nbs */
2921     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2922     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2923 
2924     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2925     rstart -= mbs;
2926 
2927     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2928     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2929     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2930     for (i=0; i<mbs; i++) {
2931       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2932       nnz = nnz/bs;
2933       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2934       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2935       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2936     }
2937     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2938     ierr = PetscFree(bindx);CHKERRQ(ierr);
2939 
2940     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2941     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2942     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2943     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2944     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2945     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2946   }
2947 
2948   /* numeric phase */
2949   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2950   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2951 
2952   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2953   for (i=0; i<m; i++) {
2954     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2955     Ii   = i + rstart;
2956     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2957     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2958   }
2959   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2960   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2962   PetscFunctionReturn(0);
2963 }
2964