xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 9c8f2541a5996aa8eae70f697bc10dd8da5bda98)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84     A->nonzerostate++;\
85 a_noinsert:; \
86     ailen[brow] = nrow; \
87   }
88 
89 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90   { \
91     brow = row/bs;  \
92     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93     rmax = bimax[brow]; nrow = bilen[brow]; \
94     bcol = col/bs; \
95     ridx = row % bs; cidx = col % bs; \
96     low  = 0; high = nrow; \
97     while (high-low > 3) { \
98       t = (low+high)/2; \
99       if (rp[t] > bcol) high = t; \
100       else              low  = t; \
101     } \
102     for (_i=low; _i<high; _i++) { \
103       if (rp[_i] > bcol) break; \
104       if (rp[_i] == bcol) { \
105         bap = ap + bs2*_i + bs*cidx + ridx; \
106         if (addv == ADD_VALUES) *bap += value;  \
107         else                    *bap  = value;  \
108         goto b_noinsert; \
109       } \
110     } \
111     if (b->nonew == 1) goto b_noinsert; \
112     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114     N = nrow++ - 1;  \
115     /* shift up all the later entries in this row */ \
116     for (ii=N; ii>=_i; ii--) { \
117       rp[ii+1] = rp[ii]; \
118       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
119     } \
120     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
121     rp[_i]                      = bcol;  \
122     ap[bs2*_i + bs*cidx + ridx] = value;  \
123     B->nonzerostate++;\
124 b_noinsert:; \
125     bilen[brow] = nrow; \
126   }
127 
128 /* Only add/insert a(i,j) with i<=j (blocks).
129    Any a(i,j) with i>j input by user is ingored.
130 */
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSetValues_MPISBAIJ"
133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134 {
135   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
136   MatScalar      value;
137   PetscBool      roworiented = baij->roworiented;
138   PetscErrorCode ierr;
139   PetscInt       i,j,row,col;
140   PetscInt       rstart_orig=mat->rmap->rstart;
141   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
143 
144   /* Some Variables required in the macro */
145   Mat          A     = baij->A;
146   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
147   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148   MatScalar    *aa   =a->a;
149 
150   Mat         B     = baij->B;
151   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
152   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153   MatScalar   *ba   =b->a;
154 
155   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
156   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
157   MatScalar *ap,*bap;
158 
159   /* for stash */
160   PetscInt  n_loc, *in_loc = NULL;
161   MatScalar *v_loc = NULL;
162 
163   PetscFunctionBegin;
164   if (!baij->donotstash) {
165     if (n > baij->n_loc) {
166       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
167       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
168       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
169       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
170 
171       baij->n_loc = n;
172     }
173     in_loc = baij->in_loc;
174     v_loc  = baij->v_loc;
175   }
176 
177   for (i=0; i<m; i++) {
178     if (im[i] < 0) continue;
179 #if defined(PETSC_USE_DEBUG)
180     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
181 #endif
182     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
183       row = im[i] - rstart_orig;              /* local row index */
184       for (j=0; j<n; j++) {
185         if (im[i]/bs > in[j]/bs) {
186           if (a->ignore_ltriangular) {
187             continue;    /* ignore lower triangular blocks */
188           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
189         }
190         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
191           col  = in[j] - cstart_orig;         /* local col index */
192           brow = row/bs; bcol = col/bs;
193           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
194           if (roworiented) value = v[i*n+j];
195           else             value = v[i+j*m];
196           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
197           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
198         } else if (in[j] < 0) continue;
199 #if defined(PETSC_USE_DEBUG)
200         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
201 #endif
202         else {  /* off-diag entry (B) */
203           if (mat->was_assembled) {
204             if (!baij->colmap) {
205               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
206             }
207 #if defined(PETSC_USE_CTABLE)
208             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
209             col  = col - 1;
210 #else
211             col = baij->colmap[in[j]/bs] - 1;
212 #endif
213             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
214               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
215               col  =  in[j];
216               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
217               B    = baij->B;
218               b    = (Mat_SeqBAIJ*)(B)->data;
219               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
220               ba   = b->a;
221             } else col += in[j]%bs;
222           } else col = in[j];
223           if (roworiented) value = v[i*n+j];
224           else             value = v[i+j*m];
225           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         }
228       }
229     } else {  /* off processor entry */
230       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
231       if (!baij->donotstash) {
232         mat->assembled = PETSC_FALSE;
233         n_loc          = 0;
234         for (j=0; j<n; j++) {
235           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
236           in_loc[n_loc] = in[j];
237           if (roworiented) {
238             v_loc[n_loc] = v[i*n+j];
239           } else {
240             v_loc[n_loc] = v[j*m+i];
241           }
242           n_loc++;
243         }
244         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
245       }
246     }
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
253 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
254 {
255   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
256   const MatScalar *value;
257   MatScalar       *barray     =baij->barray;
258   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
259   PetscErrorCode  ierr;
260   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
261   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
262   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
263 
264   PetscFunctionBegin;
265   if (!barray) {
266     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
267     baij->barray = barray;
268   }
269 
270   if (roworiented) {
271     stepval = (n-1)*bs;
272   } else {
273     stepval = (m-1)*bs;
274   }
275   for (i=0; i<m; i++) {
276     if (im[i] < 0) continue;
277 #if defined(PETSC_USE_DEBUG)
278     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
279 #endif
280     if (im[i] >= rstart && im[i] < rend) {
281       row = im[i] - rstart;
282       for (j=0; j<n; j++) {
283         if (im[i] > in[j]) {
284           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
285           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
286         }
287         /* If NumCol = 1 then a copy is not required */
288         if ((roworiented) && (n == 1)) {
289           barray = (MatScalar*) v + i*bs2;
290         } else if ((!roworiented) && (m == 1)) {
291           barray = (MatScalar*) v + j*bs2;
292         } else { /* Here a copy is required */
293           if (roworiented) {
294             value = v + i*(stepval+bs)*bs + j*bs;
295           } else {
296             value = v + j*(stepval+bs)*bs + i*bs;
297           }
298           for (ii=0; ii<bs; ii++,value+=stepval) {
299             for (jj=0; jj<bs; jj++) {
300               *barray++ = *value++;
301             }
302           }
303           barray -=bs2;
304         }
305 
306         if (in[j] >= cstart && in[j] < cend) {
307           col  = in[j] - cstart;
308           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
309         } else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined(PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined(PETSC_USE_CTABLE)
330             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col  = in[j];
338             }
339           } else col = in[j];
340           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
341         }
342       }
343     } else {
344       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend) {
376           col  = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined(PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data--;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
396   }
397   PetscFunctionReturn(0);
398 }
399 
400 #undef __FUNCT__
401 #define __FUNCT__ "MatNorm_MPISBAIJ"
402 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
403 {
404   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
405   PetscErrorCode ierr;
406   PetscReal      sum[2],*lnorm2;
407 
408   PetscFunctionBegin;
409   if (baij->size == 1) {
410     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
411   } else {
412     if (type == NORM_FROBENIUS) {
413       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
414       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
415       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
416       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
418       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
419       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
420       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
421     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
422       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
423       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
424       PetscReal    *rsum,*rsum2,vabs;
425       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
426       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
427       MatScalar    *v;
428 
429       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
430       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
431       /* Amat */
432       v = amat->a; jj = amat->j;
433       for (brow=0; brow<mbs; brow++) {
434         grow = bs*(rstart + brow);
435         nz   = amat->i[brow+1] - amat->i[brow];
436         for (bcol=0; bcol<nz; bcol++) {
437           gcol = bs*(rstart + *jj); jj++;
438           for (col=0; col<bs; col++) {
439             for (row=0; row<bs; row++) {
440               vabs            = PetscAbsScalar(*v); v++;
441               rsum[gcol+col] += vabs;
442               /* non-diagonal block */
443               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
444             }
445           }
446         }
447       }
448       /* Bmat */
449       v = bmat->a; jj = bmat->j;
450       for (brow=0; brow<mbs; brow++) {
451         grow = bs*(rstart + brow);
452         nz = bmat->i[brow+1] - bmat->i[brow];
453         for (bcol=0; bcol<nz; bcol++) {
454           gcol = bs*garray[*jj]; jj++;
455           for (col=0; col<bs; col++) {
456             for (row=0; row<bs; row++) {
457               vabs            = PetscAbsScalar(*v); v++;
458               rsum[gcol+col] += vabs;
459               rsum[grow+row] += vabs;
460             }
461           }
462         }
463       }
464       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
465       *norm = 0.0;
466       for (col=0; col<mat->cmap->N; col++) {
467         if (rsum2[col] > *norm) *norm = rsum2[col];
468       }
469       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
470     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
477 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
478 {
479   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
480   PetscErrorCode ierr;
481   PetscInt       nstash,reallocs;
482   InsertMode     addv;
483 
484   PetscFunctionBegin;
485   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
486 
487   /* make sure all processors are either in INSERTMODE or ADDMODE */
488   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
489   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
490   mat->insertmode = addv; /* in case this processor had no cache */
491 
492   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
493   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
494   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
495   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
496   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
497   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
503 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
504 {
505   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
506   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
507   PetscErrorCode ierr;
508   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
509   PetscInt       *row,*col;
510   PetscBool      other_disassembled;
511   PetscMPIInt    n;
512   PetscBool      r1,r2,r3;
513   MatScalar      *val;
514   InsertMode     addv = mat->insertmode;
515 
516   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
517   PetscFunctionBegin;
518   if (!baij->donotstash &&  !mat->nooffprocentries) {
519     while (1) {
520       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
521       if (!flg) break;
522 
523       for (i=0; i<n;) {
524         /* Now identify the consecutive vals belonging to the same row */
525         for (j=i,rstart=row[j]; j<n; j++) {
526           if (row[j] != rstart) break;
527         }
528         if (j < n) ncols = j-i;
529         else       ncols = n-i;
530         /* Now assemble all these values with a single function call */
531         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
532         i    = j;
533       }
534     }
535     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
536     /* Now process the block-stash. Since the values are stashed column-oriented,
537        set the roworiented flag to column oriented, and after MatSetValues()
538        restore the original flags */
539     r1 = baij->roworiented;
540     r2 = a->roworiented;
541     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
542 
543     baij->roworiented = PETSC_FALSE;
544     a->roworiented    = PETSC_FALSE;
545 
546     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
547     while (1) {
548       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
549       if (!flg) break;
550 
551       for (i=0; i<n;) {
552         /* Now identify the consecutive vals belonging to the same row */
553         for (j=i,rstart=row[j]; j<n; j++) {
554           if (row[j] != rstart) break;
555         }
556         if (j < n) ncols = j-i;
557         else       ncols = n-i;
558         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
559         i    = j;
560       }
561     }
562     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
563 
564     baij->roworiented = r1;
565     a->roworiented    = r2;
566 
567     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
568   }
569 
570   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
571   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
572 
573   /* determine if any processor has disassembled, if so we must
574      also disassemble ourselfs, in order that we may reassemble. */
575   /*
576      if nonzero structure of submatrix B cannot change then we know that
577      no processor disassembled thus we can skip this stuff
578   */
579   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
580     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
581     if (mat->was_assembled && !other_disassembled) {
582       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
583     }
584   }
585 
586   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
587     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
588   }
589   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
590   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
591 
592   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
593 
594   baij->rowvalues = 0;
595 
596   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
597   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
598     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
599     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606 #include <petscdraw.h>
607 #undef __FUNCT__
608 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
609 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610 {
611   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
612   PetscErrorCode    ierr;
613   PetscInt          bs   = mat->rmap->bs;
614   PetscMPIInt       rank = baij->rank;
615   PetscBool         iascii,isdraw;
616   PetscViewer       sviewer;
617   PetscViewerFormat format;
618 
619   PetscFunctionBegin;
620   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
621   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
622   if (iascii) {
623     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
624     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
625       MatInfo info;
626       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
634       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
635       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
636       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
637       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_INFO) {
640       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
643       PetscFunctionReturn(0);
644     }
645   }
646 
647   if (isdraw) {
648     PetscDraw draw;
649     PetscBool isnull;
650     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
651     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
652   }
653 
654   {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
671     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
676     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++;
687           a += bs;
688         }
689       }
690     }
691     /* copy over the B part */
692     Bloc = (Mat_SeqBAIJ*)baij->B->data;
693     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
694     for (i=0; i<mbs; i++) {
695 
696       rvals[0] = bs*(baij->rstartbs + i);
697       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
698       for (j=ai[i]; j<ai[i+1]; j++) {
699         col = baij->garray[aj[j]]*bs;
700         for (k=0; k<bs; k++) {
701           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
702           col++;
703           a += bs;
704         }
705       }
706     }
707     ierr = PetscFree(rvals);CHKERRQ(ierr);
708     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     /*
711        Everyone has to call to draw the matrix since the graphics waits are
712        synchronized across all processors that share the PetscDraw object
713     */
714     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     if (!rank) {
716         ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
717     }
718     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = MatDestroy(&A);CHKERRQ(ierr);
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatView_MPISBAIJ"
726 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
727 {
728   PetscErrorCode ierr;
729   PetscBool      iascii,isdraw,issocket,isbinary;
730 
731   PetscFunctionBegin;
732   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
733   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
734   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
736   if (iascii || isdraw || issocket || isbinary) {
737     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDestroy_MPISBAIJ"
744 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750 #if defined(PETSC_USE_LOG)
751   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
752 #endif
753   ierr = MatDestroy_Redundant(&mat->redundant);CHKERRQ(ierr);
754   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
755   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
756   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
757   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
758 #if defined(PETSC_USE_CTABLE)
759   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
760 #else
761   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
762 #endif
763   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
765   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
771   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
772   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
773   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
774   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
776   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
778 #if defined(PETSC_USE_REAL_MAT_SINGLE)
779   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
780 #endif
781   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
782   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
784   ierr = PetscFree(mat->data);CHKERRQ(ierr);
785 
786   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807 
808   /* diagonal part */
809   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
810   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
811 
812   /* subdiagonal part */
813   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
814 
815   /* copy x into the vec slvec0 */
816   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
820   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   /* supperdiagonal part */
826   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMult_MPISBAIJ"
832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
833 {
834   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
835   PetscErrorCode ierr;
836   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
837   PetscScalar    *x,*from;
838 
839   PetscFunctionBegin;
840   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
841   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
842 
843   /* diagonal part */
844   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
845   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
846 
847   /* subdiagonal part */
848   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
849 
850   /* copy x into the vec slvec0 */
851   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
855   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* supperdiagonal part */
861   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
868 {
869   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
870   PetscErrorCode ierr;
871   PetscInt       nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876 
877   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
878   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
879 
880   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   /* do diagonal part */
882   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
883   /* do supperdiagonal part */
884   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
886   /* do subdiagonal part */
887   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
888   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
895 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
900   PetscScalar    *x,*from,zero=0.0;
901 
902   PetscFunctionBegin;
903   /*
904   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
905   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
906   */
907   /* diagonal part */
908   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
909   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
910 
911   /* subdiagonal part */
912   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
913 
914   /* copy x into the vec slvec0 */
915   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
916   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
917   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
918   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
919 
920   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
922   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923 
924   /* supperdiagonal part */
925   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
931 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
932 {
933   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
934   PetscErrorCode ierr;
935 
936   PetscFunctionBegin;
937   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
938   /* do diagonal part */
939   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
940   /* do supperdiagonal part */
941   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
943 
944   /* do subdiagonal part */
945   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
946   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
947   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 /*
952   This only works correctly for square matrices where the subblock A->A is the
953    diagonal block
954 */
955 #undef __FUNCT__
956 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
957 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
958 {
959   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
964   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatScale_MPISBAIJ"
970 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
977   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatGetRow_MPISBAIJ"
983 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
984 {
985   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
986   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
987   PetscErrorCode ierr;
988   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
989   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
990   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
991 
992   PetscFunctionBegin;
993   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
994   mat->getrowactive = PETSC_TRUE;
995 
996   if (!mat->rowvalues && (idx || v)) {
997     /*
998         allocate enough space to hold information from the longest row.
999     */
1000     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1001     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1002     PetscInt     max = 1,mbs = mat->mbs,tmp;
1003     for (i=0; i<mbs; i++) {
1004       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1005       if (max < tmp) max = tmp;
1006     }
1007     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1008   }
1009 
1010   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1011   lrow = row - brstart;  /* local row index */
1012 
1013   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1014   if (!v)   {pvA = 0; pvB = 0;}
1015   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1016   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1017   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1018   nztot = nzA + nzB;
1019 
1020   cmap = mat->garray;
1021   if (v  || idx) {
1022     if (nztot) {
1023       /* Sort by increasing column numbers, assuming A and B already sorted */
1024       PetscInt imark = -1;
1025       if (v) {
1026         *v = v_p = mat->rowvalues;
1027         for (i=0; i<nzB; i++) {
1028           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1029           else break;
1030         }
1031         imark = i;
1032         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1033         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1034       }
1035       if (idx) {
1036         *idx = idx_p = mat->rowindices;
1037         if (imark > -1) {
1038           for (i=0; i<imark; i++) {
1039             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1040           }
1041         } else {
1042           for (i=0; i<nzB; i++) {
1043             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1044             else break;
1045           }
1046           imark = i;
1047         }
1048         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1049         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050       }
1051     } else {
1052       if (idx) *idx = 0;
1053       if (v)   *v   = 0;
1054     }
1055   }
1056   *nz  = nztot;
1057   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1058   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1064 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1065 {
1066   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1067 
1068   PetscFunctionBegin;
1069   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1070   baij->getrowactive = PETSC_FALSE;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1076 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1077 {
1078   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1079   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1080 
1081   PetscFunctionBegin;
1082   aA->getrow_utriangular = PETSC_TRUE;
1083   PetscFunctionReturn(0);
1084 }
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1087 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1088 {
1089   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1090   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1091 
1092   PetscFunctionBegin;
1093   aA->getrow_utriangular = PETSC_FALSE;
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1099 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1100 {
1101   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1106   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1112 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1113 {
1114   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1119   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1125 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1126 {
1127   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1132   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1138 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1139 {
1140   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1141   Mat            A  = a->A,B = a->B;
1142   PetscErrorCode ierr;
1143   PetscReal      isend[5],irecv[5];
1144 
1145   PetscFunctionBegin;
1146   info->block_size = (PetscReal)matin->rmap->bs;
1147 
1148   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1149 
1150   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1151   isend[3] = info->memory;  isend[4] = info->mallocs;
1152 
1153   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1154 
1155   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1156   isend[3] += info->memory;  isend[4] += info->mallocs;
1157   if (flag == MAT_LOCAL) {
1158     info->nz_used      = isend[0];
1159     info->nz_allocated = isend[1];
1160     info->nz_unneeded  = isend[2];
1161     info->memory       = isend[3];
1162     info->mallocs      = isend[4];
1163   } else if (flag == MAT_GLOBAL_MAX) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1165 
1166     info->nz_used      = irecv[0];
1167     info->nz_allocated = irecv[1];
1168     info->nz_unneeded  = irecv[2];
1169     info->memory       = irecv[3];
1170     info->mallocs      = irecv[4];
1171   } else if (flag == MAT_GLOBAL_SUM) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1173 
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1180   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1181   info->fill_ratio_needed = 0;
1182   info->factor_mallocs    = 0;
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1189 {
1190   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1191   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   switch (op) {
1196   case MAT_NEW_NONZERO_LOCATIONS:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1199   case MAT_KEEP_NONZERO_PATTERN:
1200   case MAT_NEW_NONZERO_LOCATION_ERR:
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = flg;
1206 
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1209     break;
1210   case MAT_NEW_DIAGONALS:
1211     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1212     break;
1213   case MAT_IGNORE_OFF_PROC_ENTRIES:
1214     a->donotstash = flg;
1215     break;
1216   case MAT_USE_HASH_TABLE:
1217     a->ht_flag = flg;
1218     break;
1219   case MAT_HERMITIAN:
1220     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1221     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1222 
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set = PETSC_TRUE;
1227     A->spd     = flg;
1228     if (flg) {
1229       A->symmetric                  = PETSC_TRUE;
1230       A->structurally_symmetric     = PETSC_TRUE;
1231       A->symmetric_set              = PETSC_TRUE;
1232       A->structurally_symmetric_set = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   if (MAT_INITIAL_MATRIX || *B != A) {
1268     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1275 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1276 {
1277   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1278   Mat            a     = baij->A, b=baij->B;
1279   PetscErrorCode ierr;
1280   PetscInt       nv,m,n;
1281   PetscBool      flg;
1282 
1283   PetscFunctionBegin;
1284   if (ll != rr) {
1285     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1286     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1287   }
1288   if (!ll) PetscFunctionReturn(0);
1289 
1290   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1291   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1292 
1293   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1294   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1295 
1296   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297 
1298   /* left diagonalscale the off-diagonal part */
1299   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1300 
1301   /* scale the diagonal part */
1302   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1303 
1304   /* right diagonalscale the off-diagonal part */
1305   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1312 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatEqual_MPISBAIJ"
1326 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1327 {
1328   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1329   Mat            a,b,c,d;
1330   PetscBool      flg;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   a = matA->A; b = matA->B;
1335   c = matB->A; d = matB->B;
1336 
1337   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1338   if (flg) {
1339     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1340   }
1341   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatCopy_MPISBAIJ"
1347 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1348 {
1349   PetscErrorCode ierr;
1350   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1351   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1352 
1353   PetscFunctionBegin;
1354   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1355   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1356     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1357     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1358     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1359   } else {
1360     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1361     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1368 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1379 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1380 {
1381   PetscErrorCode ierr;
1382   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1383   PetscBLASInt   bnz,one=1;
1384   Mat_SeqSBAIJ   *xa,*ya;
1385   Mat_SeqBAIJ    *xb,*yb;
1386 
1387   PetscFunctionBegin;
1388   if (str == SAME_NONZERO_PATTERN) {
1389     PetscScalar alpha = a;
1390     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1391     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1392     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1393     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1394     xb   = (Mat_SeqBAIJ*)xx->B->data;
1395     yb   = (Mat_SeqBAIJ*)yy->B->data;
1396     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1397     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1398     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1399   } else {
1400     Mat      B;
1401     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1402     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1403     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1404     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1405     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1406     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1407     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1408     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1409     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1410     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1411     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1412     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1413     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1414     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1415     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1416     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1417     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1418     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1419     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1420     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1427 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1428 {
1429   PetscErrorCode ierr;
1430   PetscInt       i;
1431   PetscBool      flg;
1432 
1433   PetscFunctionBegin;
1434   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1435   for (i=0; i<n; i++) {
1436     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1437     if (!flg) { /* *B[i] is non-symmetric, set flag */
1438       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1439     }
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 /* -------------------------------------------------------------------*/
1445 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1446                                        MatGetRow_MPISBAIJ,
1447                                        MatRestoreRow_MPISBAIJ,
1448                                        MatMult_MPISBAIJ,
1449                                /*  4*/ MatMultAdd_MPISBAIJ,
1450                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1451                                        MatMultAdd_MPISBAIJ,
1452                                        0,
1453                                        0,
1454                                        0,
1455                                /* 10*/ 0,
1456                                        0,
1457                                        0,
1458                                        MatSOR_MPISBAIJ,
1459                                        MatTranspose_MPISBAIJ,
1460                                /* 15*/ MatGetInfo_MPISBAIJ,
1461                                        MatEqual_MPISBAIJ,
1462                                        MatGetDiagonal_MPISBAIJ,
1463                                        MatDiagonalScale_MPISBAIJ,
1464                                        MatNorm_MPISBAIJ,
1465                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1466                                        MatAssemblyEnd_MPISBAIJ,
1467                                        MatSetOption_MPISBAIJ,
1468                                        MatZeroEntries_MPISBAIJ,
1469                                /* 24*/ 0,
1470                                        0,
1471                                        0,
1472                                        0,
1473                                        0,
1474                                /* 29*/ MatSetUp_MPISBAIJ,
1475                                        0,
1476                                        0,
1477                                        0,
1478                                        0,
1479                                /* 34*/ MatDuplicate_MPISBAIJ,
1480                                        0,
1481                                        0,
1482                                        0,
1483                                        0,
1484                                /* 39*/ MatAXPY_MPISBAIJ,
1485                                        MatGetSubMatrices_MPISBAIJ,
1486                                        MatIncreaseOverlap_MPISBAIJ,
1487                                        MatGetValues_MPISBAIJ,
1488                                        MatCopy_MPISBAIJ,
1489                                /* 44*/ 0,
1490                                        MatScale_MPISBAIJ,
1491                                        0,
1492                                        0,
1493                                        0,
1494                                /* 49*/ 0,
1495                                        0,
1496                                        0,
1497                                        0,
1498                                        0,
1499                                /* 54*/ 0,
1500                                        0,
1501                                        MatSetUnfactored_MPISBAIJ,
1502                                        0,
1503                                        MatSetValuesBlocked_MPISBAIJ,
1504                                /* 59*/ 0,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                        0,
1509                                /* 64*/ 0,
1510                                        0,
1511                                        0,
1512                                        0,
1513                                        0,
1514                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1515                                        0,
1516                                        0,
1517                                        0,
1518                                        0,
1519                                /* 74*/ 0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                        0,
1524                                /* 79*/ 0,
1525                                        0,
1526                                        0,
1527                                        0,
1528                                        MatLoad_MPISBAIJ,
1529                                /* 84*/ 0,
1530                                        0,
1531                                        0,
1532                                        0,
1533                                        0,
1534                                /* 89*/ 0,
1535                                        0,
1536                                        0,
1537                                        0,
1538                                        0,
1539                                /* 94*/ 0,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                        0,
1544                                /* 99*/ 0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                        0,
1549                                /*104*/ 0,
1550                                        MatRealPart_MPISBAIJ,
1551                                        MatImaginaryPart_MPISBAIJ,
1552                                        MatGetRowUpperTriangular_MPISBAIJ,
1553                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1554                                /*109*/ 0,
1555                                        MatGetRedundantMatrix_MPISBAIJ,
1556                                        0,
1557                                        0,
1558                                        0,
1559                                /*114*/ 0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                        0,
1564                                /*119*/ 0,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                        0,
1569                                /*124*/ 0,
1570                                        0,
1571                                        0,
1572                                        0,
1573                                        0,
1574                                /*129*/ 0,
1575                                        0,
1576                                        0,
1577                                        0,
1578                                        0,
1579                                /*134*/ 0,
1580                                        0,
1581                                        0,
1582                                        0,
1583                                        0,
1584                                /*139*/ 0,
1585                                        0,
1586                                        0
1587 };
1588 
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1592 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1593 {
1594   PetscFunctionBegin;
1595   *a = ((Mat_MPISBAIJ*)A->data)->A;
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1601 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1602 {
1603   Mat_MPISBAIJ   *b;
1604   PetscErrorCode ierr;
1605   PetscInt       i,mbs,Mbs;
1606 
1607   PetscFunctionBegin;
1608   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1609   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1610   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1611   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1612 
1613   b   = (Mat_MPISBAIJ*)B->data;
1614   mbs = B->rmap->n/bs;
1615   Mbs = B->rmap->N/bs;
1616   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1617 
1618   B->rmap->bs = bs;
1619   b->bs2      = bs*bs;
1620   b->mbs      = mbs;
1621   b->Mbs      = Mbs;
1622   b->nbs      = B->cmap->n/bs;
1623   b->Nbs      = B->cmap->N/bs;
1624 
1625   for (i=0; i<=b->size; i++) {
1626     b->rangebs[i] = B->rmap->range[i]/bs;
1627   }
1628   b->rstartbs = B->rmap->rstart/bs;
1629   b->rendbs   = B->rmap->rend/bs;
1630 
1631   b->cstartbs = B->cmap->rstart/bs;
1632   b->cendbs   = B->cmap->rend/bs;
1633 
1634   if (!B->preallocated) {
1635     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1636     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1637     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1638     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1639     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1640     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1641     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1642     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1643     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1644   }
1645 
1646   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1647   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1648 
1649   B->preallocated = PETSC_TRUE;
1650   PetscFunctionReturn(0);
1651 }
1652 
1653 #undef __FUNCT__
1654 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1655 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1656 {
1657   PetscInt       m,rstart,cstart,cend;
1658   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1659   const PetscInt *JJ    =0;
1660   PetscScalar    *values=0;
1661   PetscErrorCode ierr;
1662 
1663   PetscFunctionBegin;
1664   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1665   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1666   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1667   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1668   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1669   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1670   m      = B->rmap->n/bs;
1671   rstart = B->rmap->rstart/bs;
1672   cstart = B->cmap->rstart/bs;
1673   cend   = B->cmap->rend/bs;
1674 
1675   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1676   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1677   for (i=0; i<m; i++) {
1678     nz = ii[i+1] - ii[i];
1679     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1680     nz_max = PetscMax(nz_max,nz);
1681     JJ     = jj + ii[i];
1682     for (j=0; j<nz; j++) {
1683       if (*JJ >= cstart) break;
1684       JJ++;
1685     }
1686     d = 0;
1687     for (; j<nz; j++) {
1688       if (*JJ++ >= cend) break;
1689       d++;
1690     }
1691     d_nnz[i] = d;
1692     o_nnz[i] = nz - d;
1693   }
1694   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1695   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1696 
1697   values = (PetscScalar*)V;
1698   if (!values) {
1699     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1700     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1701   }
1702   for (i=0; i<m; i++) {
1703     PetscInt          row    = i + rstart;
1704     PetscInt          ncols  = ii[i+1] - ii[i];
1705     const PetscInt    *icols = jj + ii[i];
1706     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1707     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1708   }
1709 
1710   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1711   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1712   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1713   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 #if defined(PETSC_HAVE_MUMPS)
1718 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1719 #endif
1720 #if defined(PETSC_HAVE_PASTIX)
1721 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1722 #endif
1723 
1724 /*MC
1725    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1726    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1727    the matrix is stored.
1728 
1729   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1730   can call MatSetOption(Mat, MAT_HERMITIAN);
1731 
1732    Options Database Keys:
1733 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1734 
1735   Level: beginner
1736 
1737 .seealso: MatCreateMPISBAIJ
1738 M*/
1739 
1740 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatCreate_MPISBAIJ"
1744 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1745 {
1746   Mat_MPISBAIJ   *b;
1747   PetscErrorCode ierr;
1748   PetscBool      flg;
1749 
1750   PetscFunctionBegin;
1751   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1752   B->data = (void*)b;
1753   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1754 
1755   B->ops->destroy = MatDestroy_MPISBAIJ;
1756   B->ops->view    = MatView_MPISBAIJ;
1757   B->assembled    = PETSC_FALSE;
1758   B->insertmode   = NOT_SET_VALUES;
1759 
1760   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1761   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1762 
1763   /* build local table of row and column ownerships */
1764   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1765 
1766   /* build cache for off array entries formed */
1767   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1768 
1769   b->donotstash  = PETSC_FALSE;
1770   b->colmap      = NULL;
1771   b->garray      = NULL;
1772   b->roworiented = PETSC_TRUE;
1773 
1774   /* stuff used in block assembly */
1775   b->barray = 0;
1776 
1777   /* stuff used for matrix vector multiply */
1778   b->lvec    = 0;
1779   b->Mvctx   = 0;
1780   b->slvec0  = 0;
1781   b->slvec0b = 0;
1782   b->slvec1  = 0;
1783   b->slvec1a = 0;
1784   b->slvec1b = 0;
1785   b->sMvctx  = 0;
1786 
1787   /* stuff for MatGetRow() */
1788   b->rowindices   = 0;
1789   b->rowvalues    = 0;
1790   b->getrowactive = PETSC_FALSE;
1791 
1792   /* hash table stuff */
1793   b->ht           = 0;
1794   b->hd           = 0;
1795   b->ht_size      = 0;
1796   b->ht_flag      = PETSC_FALSE;
1797   b->ht_fact      = 0;
1798   b->ht_total_ct  = 0;
1799   b->ht_insert_ct = 0;
1800 
1801   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1802   b->ijonly = PETSC_FALSE;
1803 
1804   b->in_loc = 0;
1805   b->v_loc  = 0;
1806   b->n_loc  = 0;
1807   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1808   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1809   if (flg) {
1810     PetscReal fact = 1.39;
1811     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1812     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1813     if (fact <= 1.0) fact = 1.39;
1814     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1815     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1816   }
1817   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1818 
1819 #if defined(PETSC_HAVE_PASTIX)
1820   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1821 #endif
1822 #if defined(PETSC_HAVE_MUMPS)
1823   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1824 #endif
1825   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1826   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1829   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1831 
1832   B->symmetric                  = PETSC_TRUE;
1833   B->structurally_symmetric     = PETSC_TRUE;
1834   B->symmetric_set              = PETSC_TRUE;
1835   B->structurally_symmetric_set = PETSC_TRUE;
1836 
1837   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 /*MC
1842    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1843 
1844    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1845    and MATMPISBAIJ otherwise.
1846 
1847    Options Database Keys:
1848 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1849 
1850   Level: beginner
1851 
1852 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1853 M*/
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1857 /*@C
1858    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1859    the user should preallocate the matrix storage by setting the parameters
1860    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1861    performance can be increased by more than a factor of 50.
1862 
1863    Collective on Mat
1864 
1865    Input Parameters:
1866 +  B - the matrix
1867 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1868           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1869 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1870            submatrix  (same for all local rows)
1871 .  d_nnz - array containing the number of block nonzeros in the various block rows
1872            in the upper triangular and diagonal part of the in diagonal portion of the local
1873            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1874            for the diagonal entry and set a value even if it is zero.
1875 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1876            submatrix (same for all local rows).
1877 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1878            off-diagonal portion of the local submatrix that is right of the diagonal
1879            (possibly different for each block row) or NULL.
1880 
1881 
1882    Options Database Keys:
1883 .   -mat_no_unroll - uses code that does not unroll the loops in the
1884                      block calculations (much slower)
1885 .   -mat_block_size - size of the blocks to use
1886 
1887    Notes:
1888 
1889    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1890    than it must be used on all processors that share the object for that argument.
1891 
1892    If the *_nnz parameter is given then the *_nz parameter is ignored
1893 
1894    Storage Information:
1895    For a square global matrix we define each processor's diagonal portion
1896    to be its local rows and the corresponding columns (a square submatrix);
1897    each processor's off-diagonal portion encompasses the remainder of the
1898    local matrix (a rectangular submatrix).
1899 
1900    The user can specify preallocated storage for the diagonal part of
1901    the local submatrix with either d_nz or d_nnz (not both).  Set
1902    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1903    memory allocation.  Likewise, specify preallocated storage for the
1904    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1905 
1906    You can call MatGetInfo() to get information on how effective the preallocation was;
1907    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1908    You can also run with the option -info and look for messages with the string
1909    malloc in them to see if additional memory allocation was needed.
1910 
1911    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1912    the figure below we depict these three local rows and all columns (0-11).
1913 
1914 .vb
1915            0 1 2 3 4 5 6 7 8 9 10 11
1916           --------------------------
1917    row 3  |. . . d d d o o o o  o  o
1918    row 4  |. . . d d d o o o o  o  o
1919    row 5  |. . . d d d o o o o  o  o
1920           --------------------------
1921 .ve
1922 
1923    Thus, any entries in the d locations are stored in the d (diagonal)
1924    submatrix, and any entries in the o locations are stored in the
1925    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1926    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1927 
1928    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1929    plus the diagonal part of the d matrix,
1930    and o_nz should indicate the number of block nonzeros per row in the o matrix
1931 
1932    In general, for PDE problems in which most nonzeros are near the diagonal,
1933    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1934    or you will get TERRIBLE performance; see the users' manual chapter on
1935    matrices.
1936 
1937    Level: intermediate
1938 
1939 .keywords: matrix, block, aij, compressed row, sparse, parallel
1940 
1941 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1942 @*/
1943 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1944 {
1945   PetscErrorCode ierr;
1946 
1947   PetscFunctionBegin;
1948   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1949   PetscValidType(B,1);
1950   PetscValidLogicalCollectiveInt(B,bs,2);
1951   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatCreateSBAIJ"
1957 /*@C
1958    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1959    (block compressed row).  For good matrix assembly performance
1960    the user should preallocate the matrix storage by setting the parameters
1961    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1962    performance can be increased by more than a factor of 50.
1963 
1964    Collective on MPI_Comm
1965 
1966    Input Parameters:
1967 +  comm - MPI communicator
1968 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1969           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1970 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1971            This value should be the same as the local size used in creating the
1972            y vector for the matrix-vector product y = Ax.
1973 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1974            This value should be the same as the local size used in creating the
1975            x vector for the matrix-vector product y = Ax.
1976 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1977 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1978 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1979            submatrix  (same for all local rows)
1980 .  d_nnz - array containing the number of block nonzeros in the various block rows
1981            in the upper triangular portion of the in diagonal portion of the local
1982            (possibly different for each block block row) or NULL.
1983            If you plan to factor the matrix you must leave room for the diagonal entry and
1984            set its value even if it is zero.
1985 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1986            submatrix (same for all local rows).
1987 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1988            off-diagonal portion of the local submatrix (possibly different for
1989            each block row) or NULL.
1990 
1991    Output Parameter:
1992 .  A - the matrix
1993 
1994    Options Database Keys:
1995 .   -mat_no_unroll - uses code that does not unroll the loops in the
1996                      block calculations (much slower)
1997 .   -mat_block_size - size of the blocks to use
1998 .   -mat_mpi - use the parallel matrix data structures even on one processor
1999                (defaults to using SeqBAIJ format on one processor)
2000 
2001    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2002    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2003    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2004 
2005    Notes:
2006    The number of rows and columns must be divisible by blocksize.
2007    This matrix type does not support complex Hermitian operation.
2008 
2009    The user MUST specify either the local or global matrix dimensions
2010    (possibly both).
2011 
2012    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2013    than it must be used on all processors that share the object for that argument.
2014 
2015    If the *_nnz parameter is given then the *_nz parameter is ignored
2016 
2017    Storage Information:
2018    For a square global matrix we define each processor's diagonal portion
2019    to be its local rows and the corresponding columns (a square submatrix);
2020    each processor's off-diagonal portion encompasses the remainder of the
2021    local matrix (a rectangular submatrix).
2022 
2023    The user can specify preallocated storage for the diagonal part of
2024    the local submatrix with either d_nz or d_nnz (not both).  Set
2025    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2026    memory allocation.  Likewise, specify preallocated storage for the
2027    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2028 
2029    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2030    the figure below we depict these three local rows and all columns (0-11).
2031 
2032 .vb
2033            0 1 2 3 4 5 6 7 8 9 10 11
2034           --------------------------
2035    row 3  |. . . d d d o o o o  o  o
2036    row 4  |. . . d d d o o o o  o  o
2037    row 5  |. . . d d d o o o o  o  o
2038           --------------------------
2039 .ve
2040 
2041    Thus, any entries in the d locations are stored in the d (diagonal)
2042    submatrix, and any entries in the o locations are stored in the
2043    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2044    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2045 
2046    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2047    plus the diagonal part of the d matrix,
2048    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2049    In general, for PDE problems in which most nonzeros are near the diagonal,
2050    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2051    or you will get TERRIBLE performance; see the users' manual chapter on
2052    matrices.
2053 
2054    Level: intermediate
2055 
2056 .keywords: matrix, block, aij, compressed row, sparse, parallel
2057 
2058 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2059 @*/
2060 
2061 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2062 {
2063   PetscErrorCode ierr;
2064   PetscMPIInt    size;
2065 
2066   PetscFunctionBegin;
2067   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2068   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2070   if (size > 1) {
2071     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2072     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2073   } else {
2074     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2075     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2076   }
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 
2081 #undef __FUNCT__
2082 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2083 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2084 {
2085   Mat            mat;
2086   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2087   PetscErrorCode ierr;
2088   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2089   PetscScalar    *array;
2090 
2091   PetscFunctionBegin;
2092   *newmat = 0;
2093 
2094   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2095   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2096   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2097   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2098   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2099   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2100 
2101   mat->factortype   = matin->factortype;
2102   mat->preallocated = PETSC_TRUE;
2103   mat->assembled    = PETSC_TRUE;
2104   mat->insertmode   = NOT_SET_VALUES;
2105 
2106   a      = (Mat_MPISBAIJ*)mat->data;
2107   a->bs2 = oldmat->bs2;
2108   a->mbs = oldmat->mbs;
2109   a->nbs = oldmat->nbs;
2110   a->Mbs = oldmat->Mbs;
2111   a->Nbs = oldmat->Nbs;
2112 
2113 
2114   a->size         = oldmat->size;
2115   a->rank         = oldmat->rank;
2116   a->donotstash   = oldmat->donotstash;
2117   a->roworiented  = oldmat->roworiented;
2118   a->rowindices   = 0;
2119   a->rowvalues    = 0;
2120   a->getrowactive = PETSC_FALSE;
2121   a->barray       = 0;
2122   a->rstartbs     = oldmat->rstartbs;
2123   a->rendbs       = oldmat->rendbs;
2124   a->cstartbs     = oldmat->cstartbs;
2125   a->cendbs       = oldmat->cendbs;
2126 
2127   /* hash table stuff */
2128   a->ht           = 0;
2129   a->hd           = 0;
2130   a->ht_size      = 0;
2131   a->ht_flag      = oldmat->ht_flag;
2132   a->ht_fact      = oldmat->ht_fact;
2133   a->ht_total_ct  = 0;
2134   a->ht_insert_ct = 0;
2135 
2136   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2137   if (oldmat->colmap) {
2138 #if defined(PETSC_USE_CTABLE)
2139     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2140 #else
2141     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2142     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2143     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2144 #endif
2145   } else a->colmap = 0;
2146 
2147   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2148     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2149     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2150     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2151   } else a->garray = 0;
2152 
2153   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2154   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2156   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2158 
2159   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2160   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2161   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2162   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2163 
2164   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2165   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2166   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2167   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2168   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2169   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2170   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2171   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2172   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2173   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2174   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2175   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2177 
2178   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2179   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2180   a->sMvctx = oldmat->sMvctx;
2181   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2182 
2183   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2184   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2185   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2186   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2187   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2188   *newmat = mat;
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatLoad_MPISBAIJ"
2194 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2195 {
2196   PetscErrorCode ierr;
2197   PetscInt       i,nz,j,rstart,rend;
2198   PetscScalar    *vals,*buf;
2199   MPI_Comm       comm;
2200   MPI_Status     status;
2201   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2202   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2203   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2204   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2205   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2206   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2207   int            fd;
2208 
2209   PetscFunctionBegin;
2210   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2211   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2212   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2213   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2214 
2215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2216   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2217   if (!rank) {
2218     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2219     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2220     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2221     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2222   }
2223 
2224   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2225 
2226   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2227   M    = header[1];
2228   N    = header[2];
2229 
2230   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2231   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2232   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2233 
2234   /* If global sizes are set, check if they are consistent with that given in the file */
2235   if (sizesset) {
2236     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2237   }
2238   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2239   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2240 
2241   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2242 
2243   /*
2244      This code adds extra rows to make sure the number of rows is
2245      divisible by the blocksize
2246   */
2247   Mbs        = M/bs;
2248   extra_rows = bs - M + bs*(Mbs);
2249   if (extra_rows == bs) extra_rows = 0;
2250   else                  Mbs++;
2251   if (extra_rows &&!rank) {
2252     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2253   }
2254 
2255   /* determine ownership of all rows */
2256   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2257     mbs = Mbs/size + ((Mbs % size) > rank);
2258     m   = mbs*bs;
2259   } else { /* User Set */
2260     m   = newmat->rmap->n;
2261     mbs = m/bs;
2262   }
2263   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2264   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2265   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2266   rowners[0] = 0;
2267   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2268   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2269   rstart = rowners[rank];
2270   rend   = rowners[rank+1];
2271 
2272   /* distribute row lengths to all processors */
2273   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2274   if (!rank) {
2275     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2276     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2277     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2278     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2279     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2280     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2281     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2282   } else {
2283     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2284   }
2285 
2286   if (!rank) {   /* procs[0] */
2287     /* calculate the number of nonzeros on each processor */
2288     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2289     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2290     for (i=0; i<size; i++) {
2291       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2292         procsnz[i] += rowlengths[j];
2293       }
2294     }
2295     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2296 
2297     /* determine max buffer needed and allocate it */
2298     maxnz = 0;
2299     for (i=0; i<size; i++) {
2300       maxnz = PetscMax(maxnz,procsnz[i]);
2301     }
2302     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2303 
2304     /* read in my part of the matrix column indices  */
2305     nz     = procsnz[0];
2306     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2307     mycols = ibuf;
2308     if (size == 1) nz -= extra_rows;
2309     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2310     if (size == 1) {
2311       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2312     }
2313 
2314     /* read in every ones (except the last) and ship off */
2315     for (i=1; i<size-1; i++) {
2316       nz   = procsnz[i];
2317       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2318       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2319     }
2320     /* read in the stuff for the last proc */
2321     if (size != 1) {
2322       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2323       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2324       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2325       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2326     }
2327     ierr = PetscFree(cols);CHKERRQ(ierr);
2328   } else {  /* procs[i], i>0 */
2329     /* determine buffer space needed for message */
2330     nz = 0;
2331     for (i=0; i<m; i++) nz += locrowlens[i];
2332     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2333     mycols = ibuf;
2334     /* receive message of column indices*/
2335     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2336     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2337     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2338   }
2339 
2340   /* loop over local rows, determining number of off diagonal entries */
2341   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2342   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2343   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2344   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2345   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2346   rowcount = 0;
2347   nzcount  = 0;
2348   for (i=0; i<mbs; i++) {
2349     dcount  = 0;
2350     odcount = 0;
2351     for (j=0; j<bs; j++) {
2352       kmax = locrowlens[rowcount];
2353       for (k=0; k<kmax; k++) {
2354         tmp = mycols[nzcount++]/bs; /* block col. index */
2355         if (!mask[tmp]) {
2356           mask[tmp] = 1;
2357           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2358           else masked1[dcount++] = tmp; /* entry in diag portion */
2359         }
2360       }
2361       rowcount++;
2362     }
2363 
2364     dlens[i]  = dcount;  /* d_nzz[i] */
2365     odlens[i] = odcount; /* o_nzz[i] */
2366 
2367     /* zero out the mask elements we set */
2368     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2369     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2370   }
2371   if (!sizesset) {
2372     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2373   }
2374   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2375   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2376 
2377   if (!rank) {
2378     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2379     /* read in my part of the matrix numerical values  */
2380     nz     = procsnz[0];
2381     vals   = buf;
2382     mycols = ibuf;
2383     if (size == 1) nz -= extra_rows;
2384     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2385     if (size == 1) {
2386       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2387     }
2388 
2389     /* insert into matrix */
2390     jj = rstart*bs;
2391     for (i=0; i<m; i++) {
2392       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2393       mycols += locrowlens[i];
2394       vals   += locrowlens[i];
2395       jj++;
2396     }
2397 
2398     /* read in other processors (except the last one) and ship out */
2399     for (i=1; i<size-1; i++) {
2400       nz   = procsnz[i];
2401       vals = buf;
2402       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2403       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2404     }
2405     /* the last proc */
2406     if (size != 1) {
2407       nz   = procsnz[i] - extra_rows;
2408       vals = buf;
2409       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2410       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2411       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2412     }
2413     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2414 
2415   } else {
2416     /* receive numeric values */
2417     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2418 
2419     /* receive message of values*/
2420     vals   = buf;
2421     mycols = ibuf;
2422     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2423     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2424     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2425 
2426     /* insert into matrix */
2427     jj = rstart*bs;
2428     for (i=0; i<m; i++) {
2429       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2430       mycols += locrowlens[i];
2431       vals   += locrowlens[i];
2432       jj++;
2433     }
2434   }
2435 
2436   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2437   ierr = PetscFree(buf);CHKERRQ(ierr);
2438   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2439   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2440   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2441   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2442   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2443   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2449 /*XXXXX@
2450    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2451 
2452    Input Parameters:
2453 .  mat  - the matrix
2454 .  fact - factor
2455 
2456    Not Collective on Mat, each process can have a different hash factor
2457 
2458    Level: advanced
2459 
2460   Notes:
2461    This can also be set by the command line option: -mat_use_hash_table fact
2462 
2463 .keywords: matrix, hashtable, factor, HT
2464 
2465 .seealso: MatSetOption()
2466 @XXXXX*/
2467 
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2471 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2472 {
2473   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2474   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2475   PetscReal      atmp;
2476   PetscReal      *work,*svalues,*rvalues;
2477   PetscErrorCode ierr;
2478   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2479   PetscMPIInt    rank,size;
2480   PetscInt       *rowners_bs,dest,count,source;
2481   PetscScalar    *va;
2482   MatScalar      *ba;
2483   MPI_Status     stat;
2484 
2485   PetscFunctionBegin;
2486   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2487   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2488   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2489 
2490   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2491   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2492 
2493   bs  = A->rmap->bs;
2494   mbs = a->mbs;
2495   Mbs = a->Mbs;
2496   ba  = b->a;
2497   bi  = b->i;
2498   bj  = b->j;
2499 
2500   /* find ownerships */
2501   rowners_bs = A->rmap->range;
2502 
2503   /* each proc creates an array to be distributed */
2504   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2505   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2506 
2507   /* row_max for B */
2508   if (rank != size-1) {
2509     for (i=0; i<mbs; i++) {
2510       ncols = bi[1] - bi[0]; bi++;
2511       brow  = bs*i;
2512       for (j=0; j<ncols; j++) {
2513         bcol = bs*(*bj);
2514         for (kcol=0; kcol<bs; kcol++) {
2515           col  = bcol + kcol;                /* local col index */
2516           col += rowners_bs[rank+1];      /* global col index */
2517           for (krow=0; krow<bs; krow++) {
2518             atmp = PetscAbsScalar(*ba); ba++;
2519             row  = brow + krow;   /* local row index */
2520             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2521             if (work[col] < atmp) work[col] = atmp;
2522           }
2523         }
2524         bj++;
2525       }
2526     }
2527 
2528     /* send values to its owners */
2529     for (dest=rank+1; dest<size; dest++) {
2530       svalues = work + rowners_bs[dest];
2531       count   = rowners_bs[dest+1]-rowners_bs[dest];
2532       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2533     }
2534   }
2535 
2536   /* receive values */
2537   if (rank) {
2538     rvalues = work;
2539     count   = rowners_bs[rank+1]-rowners_bs[rank];
2540     for (source=0; source<rank; source++) {
2541       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2542       /* process values */
2543       for (i=0; i<count; i++) {
2544         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2545       }
2546     }
2547   }
2548 
2549   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2550   ierr = PetscFree(work);CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatSOR_MPISBAIJ"
2556 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2557 {
2558   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2559   PetscErrorCode    ierr;
2560   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2561   PetscScalar       *x,*ptr,*from;
2562   Vec               bb1;
2563   const PetscScalar *b;
2564 
2565   PetscFunctionBegin;
2566   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2567   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2568 
2569   if (flag == SOR_APPLY_UPPER) {
2570     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2571     PetscFunctionReturn(0);
2572   }
2573 
2574   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2575     if (flag & SOR_ZERO_INITIAL_GUESS) {
2576       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2577       its--;
2578     }
2579 
2580     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2581     while (its--) {
2582 
2583       /* lower triangular part: slvec0b = - B^T*xx */
2584       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2585 
2586       /* copy xx into slvec0a */
2587       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2588       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2589       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2590       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2591 
2592       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2593 
2594       /* copy bb into slvec1a */
2595       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2596       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2597       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2598       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2599 
2600       /* set slvec1b = 0 */
2601       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2602 
2603       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2604       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2605       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2606       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2607 
2608       /* upper triangular part: bb1 = bb1 - B*x */
2609       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2610 
2611       /* local diagonal sweep */
2612       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2613     }
2614     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2615   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2616     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2617   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2618     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2619   } else if (flag & SOR_EISENSTAT) {
2620     Vec               xx1;
2621     PetscBool         hasop;
2622     const PetscScalar *diag;
2623     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2624     PetscInt          i,n;
2625 
2626     if (!mat->xx1) {
2627       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2628       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2629     }
2630     xx1 = mat->xx1;
2631     bb1 = mat->bb1;
2632 
2633     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2634 
2635     if (!mat->diag) {
2636       /* this is wrong for same matrix with new nonzero values */
2637       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2638       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2639     }
2640     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2641 
2642     if (hasop) {
2643       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2644       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2645     } else {
2646       /*
2647           These two lines are replaced by code that may be a bit faster for a good compiler
2648       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2649       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2650       */
2651       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2652       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2653       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2654       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2655       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2656       if (omega == 1.0) {
2657         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2658         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2659       } else {
2660         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2661         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2662       }
2663       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2664       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2665       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2666       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2667     }
2668 
2669     /* multiply off-diagonal portion of matrix */
2670     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2671     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2672     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2673     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2674     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2675     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2676     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2677     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2678     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2679     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2680 
2681     /* local sweep */
2682     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2683     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2684   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 #undef __FUNCT__
2689 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2690 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2691 {
2692   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2693   PetscErrorCode ierr;
2694   Vec            lvec1,bb1;
2695 
2696   PetscFunctionBegin;
2697   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2698   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2699 
2700   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2701     if (flag & SOR_ZERO_INITIAL_GUESS) {
2702       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2703       its--;
2704     }
2705 
2706     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2707     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2708     while (its--) {
2709       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2710 
2711       /* lower diagonal part: bb1 = bb - B^T*xx */
2712       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2713       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2714 
2715       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2716       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2717       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2718 
2719       /* upper diagonal part: bb1 = bb1 - B*x */
2720       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2721       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2722 
2723       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2724 
2725       /* diagonal sweep */
2726       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2727     }
2728     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2729     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2730   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2736 /*@
2737      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2738          CSR format the local rows.
2739 
2740    Collective on MPI_Comm
2741 
2742    Input Parameters:
2743 +  comm - MPI communicator
2744 .  bs - the block size, only a block size of 1 is supported
2745 .  m - number of local rows (Cannot be PETSC_DECIDE)
2746 .  n - This value should be the same as the local size used in creating the
2747        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2748        calculated if N is given) For square matrices n is almost always m.
2749 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2750 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2751 .   i - row indices
2752 .   j - column indices
2753 -   a - matrix values
2754 
2755    Output Parameter:
2756 .   mat - the matrix
2757 
2758    Level: intermediate
2759 
2760    Notes:
2761        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2762      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2763      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2764 
2765        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2766 
2767 .keywords: matrix, aij, compressed row, sparse, parallel
2768 
2769 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2770           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2771 @*/
2772 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2773 {
2774   PetscErrorCode ierr;
2775 
2776 
2777   PetscFunctionBegin;
2778   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2779   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2780   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2781   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2782   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2783   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2790 /*@C
2791    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2792    (the default parallel PETSc format).
2793 
2794    Collective on MPI_Comm
2795 
2796    Input Parameters:
2797 +  B - the matrix
2798 .  bs - the block size
2799 .  i - the indices into j for the start of each local row (starts with zero)
2800 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2801 -  v - optional values in the matrix
2802 
2803    Level: developer
2804 
2805 .keywords: matrix, aij, compressed row, sparse, parallel
2806 
2807 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2808 @*/
2809 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2810 {
2811   PetscErrorCode ierr;
2812 
2813   PetscFunctionBegin;
2814   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 #undef __FUNCT__
2819 #define __FUNCT__ "MatCreateMPISBAIJConcatenateSeqSBAIJSymbolic"
2820 PetscErrorCode MatCreateMPISBAIJConcatenateSeqSBAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
2821 {
2822   PetscErrorCode ierr;
2823   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2824   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
2825   PetscInt       *indx,*bindx,rmax=a->rmax,j;
2826 
2827   PetscFunctionBegin;
2828   /* This routine will ONLY return MPISBAIJ type matrix */
2829   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2830   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2831   m = m/bs; N = N/cbs;
2832   if (n == PETSC_DECIDE) {
2833     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
2834   }
2835   /* Check sum(n) = N */
2836   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2837   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
2838 
2839   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2840   rstart -= m;
2841 
2842   ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2843   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2844   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2845   for (i=0; i<m; i++) {
2846     ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2847     nnz = nnz/bs;
2848     for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2849     ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2850     ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2851   }
2852   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2853   ierr = PetscFree(bindx);CHKERRQ(ierr);
2854 
2855   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2856   ierr = MatSetSizes(*outmat,m*bs,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2857   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2858   ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2859   ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2860   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatCreateMPISBAIJConcatenateSeqSBAIJNumeric"
2866 PetscErrorCode MatCreateMPISBAIJConcatenateSeqSBAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
2867 {
2868   PetscErrorCode ierr;
2869   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2870   PetscInt       *indx;
2871   PetscScalar    *values;
2872 
2873   PetscFunctionBegin;
2874   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2875    ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2876   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
2877 
2878   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2879   for (i=0; i<m; i++) {
2880     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2881     Ii   = i + rstart;
2882     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2883     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2884   }
2885   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2886   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2887   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2888   PetscFunctionReturn(0);
2889 }
2890 
2891 #undef __FUNCT__
2892 #define __FUNCT__ "MatCreateMPISBAIJConcatenateSeqSBAIJ"
2893 /*@
2894       MatCreateMPISBAIJConcatenateSeqSBAIJ - Creates a single large PETSc matrix by concatenating sequential
2895                  matrices from each processor
2896 
2897     Collective on MPI_Comm
2898 
2899    Input Parameters:
2900 +    comm - the communicators the parallel matrix will live on
2901 .    inmat - the input sequential matrices
2902 .    n - number of local columns (or PETSC_DECIDE)
2903 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2904 
2905    Output Parameter:
2906 .    outmat - the parallel matrix generated
2907 
2908     Level: advanced
2909 
2910    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2911 
2912 @*/
2913 PetscErrorCode MatCreateMPISBAIJConcatenateSeqSBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2914 {
2915   PetscErrorCode ierr;
2916   PetscMPIInt    size;
2917 
2918   PetscFunctionBegin;
2919   /* same as MatCreateMPIAIJConcatenateSeqAIJ() */
2920   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2921   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
2922   if (size == 1) {
2923     if (scall == MAT_INITIAL_MATRIX) {
2924       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
2925     } else {
2926       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
2927     }
2928   } else {
2929     if (scall == MAT_INITIAL_MATRIX) {
2930       ierr = MatCreateMPISBAIJConcatenateSeqSBAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
2931     }
2932     ierr = MatCreateMPISBAIJConcatenateSeqSBAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
2933   }
2934   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 #undef __FUNCT__
2939 #define __FUNCT__ "MatGetRedundantMatrix_MPISBAIJ"
2940 PetscErrorCode MatGetRedundantMatrix_MPISBAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2941 {
2942   PetscErrorCode ierr;
2943   MPI_Comm       comm;
2944   PetscMPIInt    size,subsize;
2945   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2946   Mat_Redundant  *redund=NULL;
2947   PetscSubcomm   psubcomm=NULL;
2948   MPI_Comm       subcomm_in=subcomm;
2949   Mat            *matseq;
2950   IS             isrow,iscol;
2951 
2952   PetscFunctionBegin;
2953   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2954     if (reuse ==  MAT_INITIAL_MATRIX) {
2955       /* create psubcomm, then get subcomm */
2956       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2957       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2958       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2959 
2960       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2961       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2962       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2963       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2964       subcomm = psubcomm->comm;
2965     } else { /* retrieve psubcomm and subcomm */
2966       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2967       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2968       redund   = (*matredundant)->redundant;
2969       psubcomm = redund->psubcomm;
2970     }
2971     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2972       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"not supported");
2973     }
2974   }
2975 
2976   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2977   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2978   if (reuse == MAT_INITIAL_MATRIX) {
2979     /* create a local sequential matrix matseq[0] */
2980     mloc_sub = PETSC_DECIDE;
2981     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2982     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2983     rstart = rend - mloc_sub;
2984     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2985     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2986   } else { /* reuse == MAT_REUSE_MATRIX */
2987     redund = (*matredundant)->redundant;
2988     isrow  = redund->isrow;
2989     iscol  = redund->iscol;
2990     matseq = redund->matseq;
2991   }
2992   ierr = MatGetSubMatrices_MPISBAIJ(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2993   ierr = MatCreateMPISBAIJConcatenateSeqSBAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2994 
2995   if (reuse == MAT_INITIAL_MATRIX) {
2996     /* create a supporting struct and attach it to C for reuse */
2997     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2998     (*matredundant)->redundant = redund;
2999     redund->isrow              = isrow;
3000     redund->iscol              = iscol;
3001     redund->matseq             = matseq;
3002     redund->psubcomm           = psubcomm;
3003   }
3004   PetscFunctionReturn(0);
3005 }
3006