xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a593226baf5c94d3e4393e03cae3fa59cb94e469)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84     A->nonzerostate++;\
85 a_noinsert:; \
86     ailen[brow] = nrow; \
87   }
88 
89 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90   { \
91     brow = row/bs;  \
92     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93     rmax = bimax[brow]; nrow = bilen[brow]; \
94     bcol = col/bs; \
95     ridx = row % bs; cidx = col % bs; \
96     low  = 0; high = nrow; \
97     while (high-low > 3) { \
98       t = (low+high)/2; \
99       if (rp[t] > bcol) high = t; \
100       else              low  = t; \
101     } \
102     for (_i=low; _i<high; _i++) { \
103       if (rp[_i] > bcol) break; \
104       if (rp[_i] == bcol) { \
105         bap = ap + bs2*_i + bs*cidx + ridx; \
106         if (addv == ADD_VALUES) *bap += value;  \
107         else                    *bap  = value;  \
108         goto b_noinsert; \
109       } \
110     } \
111     if (b->nonew == 1) goto b_noinsert; \
112     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114     N = nrow++ - 1;  \
115     /* shift up all the later entries in this row */ \
116     for (ii=N; ii>=_i; ii--) { \
117       rp[ii+1] = rp[ii]; \
118       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
119     } \
120     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
121     rp[_i]                      = bcol;  \
122     ap[bs2*_i + bs*cidx + ridx] = value;  \
123     B->nonzerostate++;\
124 b_noinsert:; \
125     bilen[brow] = nrow; \
126   }
127 
128 /* Only add/insert a(i,j) with i<=j (blocks).
129    Any a(i,j) with i>j input by user is ingored.
130 */
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSetValues_MPISBAIJ"
133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134 {
135   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
136   MatScalar      value;
137   PetscBool      roworiented = baij->roworiented;
138   PetscErrorCode ierr;
139   PetscInt       i,j,row,col;
140   PetscInt       rstart_orig=mat->rmap->rstart;
141   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
143 
144   /* Some Variables required in the macro */
145   Mat          A     = baij->A;
146   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
147   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148   MatScalar    *aa   =a->a;
149 
150   Mat         B     = baij->B;
151   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
152   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153   MatScalar   *ba   =b->a;
154 
155   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
156   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
157   MatScalar *ap,*bap;
158 
159   /* for stash */
160   PetscInt  n_loc, *in_loc = NULL;
161   MatScalar *v_loc = NULL;
162 
163   PetscFunctionBegin;
164   if (!baij->donotstash) {
165     if (n > baij->n_loc) {
166       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
167       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
168       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
169       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
170 
171       baij->n_loc = n;
172     }
173     in_loc = baij->in_loc;
174     v_loc  = baij->v_loc;
175   }
176 
177   for (i=0; i<m; i++) {
178     if (im[i] < 0) continue;
179 #if defined(PETSC_USE_DEBUG)
180     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
181 #endif
182     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
183       row = im[i] - rstart_orig;              /* local row index */
184       for (j=0; j<n; j++) {
185         if (im[i]/bs > in[j]/bs) {
186           if (a->ignore_ltriangular) {
187             continue;    /* ignore lower triangular blocks */
188           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
189         }
190         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
191           col  = in[j] - cstart_orig;         /* local col index */
192           brow = row/bs; bcol = col/bs;
193           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
194           if (roworiented) value = v[i*n+j];
195           else             value = v[i+j*m];
196           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
197           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
198         } else if (in[j] < 0) continue;
199 #if defined(PETSC_USE_DEBUG)
200         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
201 #endif
202         else {  /* off-diag entry (B) */
203           if (mat->was_assembled) {
204             if (!baij->colmap) {
205               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
206             }
207 #if defined(PETSC_USE_CTABLE)
208             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
209             col  = col - 1;
210 #else
211             col = baij->colmap[in[j]/bs] - 1;
212 #endif
213             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
214               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
215               col  =  in[j];
216               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
217               B    = baij->B;
218               b    = (Mat_SeqBAIJ*)(B)->data;
219               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
220               ba   = b->a;
221             } else col += in[j]%bs;
222           } else col = in[j];
223           if (roworiented) value = v[i*n+j];
224           else             value = v[i+j*m];
225           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         }
228       }
229     } else {  /* off processor entry */
230       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
231       if (!baij->donotstash) {
232         mat->assembled = PETSC_FALSE;
233         n_loc          = 0;
234         for (j=0; j<n; j++) {
235           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
236           in_loc[n_loc] = in[j];
237           if (roworiented) {
238             v_loc[n_loc] = v[i*n+j];
239           } else {
240             v_loc[n_loc] = v[j*m+i];
241           }
242           n_loc++;
243         }
244         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
245       }
246     }
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
253 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
254 {
255   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
256   const MatScalar *value;
257   MatScalar       *barray     =baij->barray;
258   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
259   PetscErrorCode  ierr;
260   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
261   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
262   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
263 
264   PetscFunctionBegin;
265   if (!barray) {
266     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
267     baij->barray = barray;
268   }
269 
270   if (roworiented) {
271     stepval = (n-1)*bs;
272   } else {
273     stepval = (m-1)*bs;
274   }
275   for (i=0; i<m; i++) {
276     if (im[i] < 0) continue;
277 #if defined(PETSC_USE_DEBUG)
278     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
279 #endif
280     if (im[i] >= rstart && im[i] < rend) {
281       row = im[i] - rstart;
282       for (j=0; j<n; j++) {
283         if (im[i] > in[j]) {
284           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
285           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
286         }
287         /* If NumCol = 1 then a copy is not required */
288         if ((roworiented) && (n == 1)) {
289           barray = (MatScalar*) v + i*bs2;
290         } else if ((!roworiented) && (m == 1)) {
291           barray = (MatScalar*) v + j*bs2;
292         } else { /* Here a copy is required */
293           if (roworiented) {
294             value = v + i*(stepval+bs)*bs + j*bs;
295           } else {
296             value = v + j*(stepval+bs)*bs + i*bs;
297           }
298           for (ii=0; ii<bs; ii++,value+=stepval) {
299             for (jj=0; jj<bs; jj++) {
300               *barray++ = *value++;
301             }
302           }
303           barray -=bs2;
304         }
305 
306         if (in[j] >= cstart && in[j] < cend) {
307           col  = in[j] - cstart;
308           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
309         } else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined(PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined(PETSC_USE_CTABLE)
330             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col  = in[j];
338             }
339           } else col = in[j];
340           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
341         }
342       }
343     } else {
344       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend) {
376           col  = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined(PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data--;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
396   }
397   PetscFunctionReturn(0);
398 }
399 
400 #undef __FUNCT__
401 #define __FUNCT__ "MatNorm_MPISBAIJ"
402 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
403 {
404   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
405   PetscErrorCode ierr;
406   PetscReal      sum[2],*lnorm2;
407 
408   PetscFunctionBegin;
409   if (baij->size == 1) {
410     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
411   } else {
412     if (type == NORM_FROBENIUS) {
413       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
414       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
415       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
416       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
418       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
419       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
420       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
421     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
422       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
423       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
424       PetscReal    *rsum,*rsum2,vabs;
425       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
426       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
427       MatScalar    *v;
428 
429       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
430       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
431       /* Amat */
432       v = amat->a; jj = amat->j;
433       for (brow=0; brow<mbs; brow++) {
434         grow = bs*(rstart + brow);
435         nz   = amat->i[brow+1] - amat->i[brow];
436         for (bcol=0; bcol<nz; bcol++) {
437           gcol = bs*(rstart + *jj); jj++;
438           for (col=0; col<bs; col++) {
439             for (row=0; row<bs; row++) {
440               vabs            = PetscAbsScalar(*v); v++;
441               rsum[gcol+col] += vabs;
442               /* non-diagonal block */
443               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
444             }
445           }
446         }
447       }
448       /* Bmat */
449       v = bmat->a; jj = bmat->j;
450       for (brow=0; brow<mbs; brow++) {
451         grow = bs*(rstart + brow);
452         nz = bmat->i[brow+1] - bmat->i[brow];
453         for (bcol=0; bcol<nz; bcol++) {
454           gcol = bs*garray[*jj]; jj++;
455           for (col=0; col<bs; col++) {
456             for (row=0; row<bs; row++) {
457               vabs            = PetscAbsScalar(*v); v++;
458               rsum[gcol+col] += vabs;
459               rsum[grow+row] += vabs;
460             }
461           }
462         }
463       }
464       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
465       *norm = 0.0;
466       for (col=0; col<mat->cmap->N; col++) {
467         if (rsum2[col] > *norm) *norm = rsum2[col];
468       }
469       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
470     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
477 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
478 {
479   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
480   PetscErrorCode ierr;
481   PetscInt       nstash,reallocs;
482   InsertMode     addv;
483 
484   PetscFunctionBegin;
485   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
486 
487   /* make sure all processors are either in INSERTMODE or ADDMODE */
488   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
489   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
490   mat->insertmode = addv; /* in case this processor had no cache */
491 
492   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
493   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
494   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
495   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
496   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
497   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
503 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
504 {
505   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
506   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
507   PetscErrorCode ierr;
508   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
509   PetscInt       *row,*col;
510   PetscBool      other_disassembled;
511   PetscMPIInt    n;
512   PetscBool      r1,r2,r3;
513   MatScalar      *val;
514   InsertMode     addv = mat->insertmode;
515 
516   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
517   PetscFunctionBegin;
518   if (!baij->donotstash &&  !mat->nooffprocentries) {
519     while (1) {
520       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
521       if (!flg) break;
522 
523       for (i=0; i<n;) {
524         /* Now identify the consecutive vals belonging to the same row */
525         for (j=i,rstart=row[j]; j<n; j++) {
526           if (row[j] != rstart) break;
527         }
528         if (j < n) ncols = j-i;
529         else       ncols = n-i;
530         /* Now assemble all these values with a single function call */
531         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
532         i    = j;
533       }
534     }
535     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
536     /* Now process the block-stash. Since the values are stashed column-oriented,
537        set the roworiented flag to column oriented, and after MatSetValues()
538        restore the original flags */
539     r1 = baij->roworiented;
540     r2 = a->roworiented;
541     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
542 
543     baij->roworiented = PETSC_FALSE;
544     a->roworiented    = PETSC_FALSE;
545 
546     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
547     while (1) {
548       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
549       if (!flg) break;
550 
551       for (i=0; i<n;) {
552         /* Now identify the consecutive vals belonging to the same row */
553         for (j=i,rstart=row[j]; j<n; j++) {
554           if (row[j] != rstart) break;
555         }
556         if (j < n) ncols = j-i;
557         else       ncols = n-i;
558         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
559         i    = j;
560       }
561     }
562     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
563 
564     baij->roworiented = r1;
565     a->roworiented    = r2;
566 
567     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
568   }
569 
570   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
571   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
572 
573   /* determine if any processor has disassembled, if so we must
574      also disassemble ourselfs, in order that we may reassemble. */
575   /*
576      if nonzero structure of submatrix B cannot change then we know that
577      no processor disassembled thus we can skip this stuff
578   */
579   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
580     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
581     if (mat->was_assembled && !other_disassembled) {
582       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
583     }
584   }
585 
586   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
587     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
588   }
589   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
590   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
591 
592   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
593 
594   baij->rowvalues = 0;
595 
596   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
597   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
598     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
599     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606 #include <petscdraw.h>
607 #undef __FUNCT__
608 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
609 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610 {
611   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
612   PetscErrorCode    ierr;
613   PetscInt          bs   = mat->rmap->bs;
614   PetscMPIInt       rank = baij->rank;
615   PetscBool         iascii,isdraw;
616   PetscViewer       sviewer;
617   PetscViewerFormat format;
618 
619   PetscFunctionBegin;
620   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
621   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
622   if (iascii) {
623     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
624     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
625       MatInfo info;
626       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
634       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
635       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
636       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
637       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_INFO) {
640       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
643       PetscFunctionReturn(0);
644     }
645   }
646 
647   if (isdraw) {
648     PetscDraw draw;
649     PetscBool isnull;
650     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
651     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
652   }
653 
654   {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
671     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
676     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++;
687           a += bs;
688         }
689       }
690     }
691     /* copy over the B part */
692     Bloc = (Mat_SeqBAIJ*)baij->B->data;
693     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
694     for (i=0; i<mbs; i++) {
695 
696       rvals[0] = bs*(baij->rstartbs + i);
697       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
698       for (j=ai[i]; j<ai[i+1]; j++) {
699         col = baij->garray[aj[j]]*bs;
700         for (k=0; k<bs; k++) {
701           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
702           col++;
703           a += bs;
704         }
705       }
706     }
707     ierr = PetscFree(rvals);CHKERRQ(ierr);
708     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     /*
711        Everyone has to call to draw the matrix since the graphics waits are
712        synchronized across all processors that share the PetscDraw object
713     */
714     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     if (!rank) {
716         ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
717     }
718     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = MatDestroy(&A);CHKERRQ(ierr);
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatView_MPISBAIJ"
726 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
727 {
728   PetscErrorCode ierr;
729   PetscBool      iascii,isdraw,issocket,isbinary;
730 
731   PetscFunctionBegin;
732   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
733   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
734   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
736   if (iascii || isdraw || issocket || isbinary) {
737     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDestroy_MPISBAIJ"
744 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750 #if defined(PETSC_USE_LOG)
751   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
752 #endif
753   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
754   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
755   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
756   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
757 #if defined(PETSC_USE_CTABLE)
758   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
759 #else
760   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
761 #endif
762   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
763   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
764   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
765   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
770   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
771   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
772   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
773   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
776   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
777 #if defined(PETSC_USE_REAL_MAT_SINGLE)
778   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
779 #endif
780   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
781   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
782   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
783   ierr = PetscFree(mat->data);CHKERRQ(ierr);
784 
785   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
796 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
797 {
798   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
799   PetscErrorCode ierr;
800   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
801   PetscScalar    *x,*from;
802 
803   PetscFunctionBegin;
804   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
805   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
806 
807   /* diagonal part */
808   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
809   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
810 
811   /* subdiagonal part */
812   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
813 
814   /* copy x into the vec slvec0 */
815   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
816   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
817 
818   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
819   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   /* supperdiagonal part */
825   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMult_MPISBAIJ"
831 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
832 {
833   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
834   PetscErrorCode ierr;
835   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
836   PetscScalar    *x,*from;
837 
838   PetscFunctionBegin;
839   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
840   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
841 
842   /* diagonal part */
843   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
844   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
845 
846   /* subdiagonal part */
847   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
848 
849   /* copy x into the vec slvec0 */
850   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
851   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
852 
853   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
854   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   /* supperdiagonal part */
860   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
866 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
867 {
868   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
869   PetscErrorCode ierr;
870   PetscInt       nt;
871 
872   PetscFunctionBegin;
873   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
874   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
875 
876   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
877   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
878 
879   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   /* do diagonal part */
881   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
882   /* do supperdiagonal part */
883   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
885   /* do subdiagonal part */
886   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
887   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
888   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 #undef __FUNCT__
893 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
894 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
895 {
896   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
897   PetscErrorCode ierr;
898   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
899   PetscScalar    *x,*from,zero=0.0;
900 
901   PetscFunctionBegin;
902   /*
903   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
904   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
905   */
906   /* diagonal part */
907   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
908   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
909 
910   /* subdiagonal part */
911   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
912 
913   /* copy x into the vec slvec0 */
914   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
915   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
916   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
917   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
918 
919   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
921   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922 
923   /* supperdiagonal part */
924   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
930 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
931 {
932   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
933   PetscErrorCode ierr;
934 
935   PetscFunctionBegin;
936   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937   /* do diagonal part */
938   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
939   /* do supperdiagonal part */
940   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
942 
943   /* do subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
945   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
946   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
947   PetscFunctionReturn(0);
948 }
949 
950 /*
951   This only works correctly for square matrices where the subblock A->A is the
952    diagonal block
953 */
954 #undef __FUNCT__
955 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
956 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
957 {
958   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
963   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatScale_MPISBAIJ"
969 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
970 {
971   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
972   PetscErrorCode ierr;
973 
974   PetscFunctionBegin;
975   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
976   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatGetRow_MPISBAIJ"
982 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
983 {
984   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
985   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
986   PetscErrorCode ierr;
987   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
988   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
989   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
990 
991   PetscFunctionBegin;
992   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
993   mat->getrowactive = PETSC_TRUE;
994 
995   if (!mat->rowvalues && (idx || v)) {
996     /*
997         allocate enough space to hold information from the longest row.
998     */
999     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1000     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1001     PetscInt     max = 1,mbs = mat->mbs,tmp;
1002     for (i=0; i<mbs; i++) {
1003       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1004       if (max < tmp) max = tmp;
1005     }
1006     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1007   }
1008 
1009   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1010   lrow = row - brstart;  /* local row index */
1011 
1012   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1013   if (!v)   {pvA = 0; pvB = 0;}
1014   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1015   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1016   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1017   nztot = nzA + nzB;
1018 
1019   cmap = mat->garray;
1020   if (v  || idx) {
1021     if (nztot) {
1022       /* Sort by increasing column numbers, assuming A and B already sorted */
1023       PetscInt imark = -1;
1024       if (v) {
1025         *v = v_p = mat->rowvalues;
1026         for (i=0; i<nzB; i++) {
1027           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1028           else break;
1029         }
1030         imark = i;
1031         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1032         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1033       }
1034       if (idx) {
1035         *idx = idx_p = mat->rowindices;
1036         if (imark > -1) {
1037           for (i=0; i<imark; i++) {
1038             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1039           }
1040         } else {
1041           for (i=0; i<nzB; i++) {
1042             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043             else break;
1044           }
1045           imark = i;
1046         }
1047         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1048         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1049       }
1050     } else {
1051       if (idx) *idx = 0;
1052       if (v)   *v   = 0;
1053     }
1054   }
1055   *nz  = nztot;
1056   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1057   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1063 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1064 {
1065   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1066 
1067   PetscFunctionBegin;
1068   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1069   baij->getrowactive = PETSC_FALSE;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1075 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1076 {
1077   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1078   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1079 
1080   PetscFunctionBegin;
1081   aA->getrow_utriangular = PETSC_TRUE;
1082   PetscFunctionReturn(0);
1083 }
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1086 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1087 {
1088   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1089   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1090 
1091   PetscFunctionBegin;
1092   aA->getrow_utriangular = PETSC_FALSE;
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1098 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1099 {
1100   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1105   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1111 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1112 {
1113   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1118   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1124 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1125 {
1126   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1131   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1137 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1138 {
1139   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1140   Mat            A  = a->A,B = a->B;
1141   PetscErrorCode ierr;
1142   PetscReal      isend[5],irecv[5];
1143 
1144   PetscFunctionBegin;
1145   info->block_size = (PetscReal)matin->rmap->bs;
1146 
1147   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1148 
1149   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1150   isend[3] = info->memory;  isend[4] = info->mallocs;
1151 
1152   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1153 
1154   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1155   isend[3] += info->memory;  isend[4] += info->mallocs;
1156   if (flag == MAT_LOCAL) {
1157     info->nz_used      = isend[0];
1158     info->nz_allocated = isend[1];
1159     info->nz_unneeded  = isend[2];
1160     info->memory       = isend[3];
1161     info->mallocs      = isend[4];
1162   } else if (flag == MAT_GLOBAL_MAX) {
1163     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1164 
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   } else if (flag == MAT_GLOBAL_SUM) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1172 
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1179   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1180   info->fill_ratio_needed = 0;
1181   info->factor_mallocs    = 0;
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1187 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1188 {
1189   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1190   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   switch (op) {
1195   case MAT_NEW_NONZERO_LOCATIONS:
1196   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1197   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1198   case MAT_KEEP_NONZERO_PATTERN:
1199   case MAT_NEW_NONZERO_LOCATION_ERR:
1200     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1201     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1202     break;
1203   case MAT_ROW_ORIENTED:
1204     a->roworiented = flg;
1205 
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_NEW_DIAGONALS:
1210     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1211     break;
1212   case MAT_IGNORE_OFF_PROC_ENTRIES:
1213     a->donotstash = flg;
1214     break;
1215   case MAT_USE_HASH_TABLE:
1216     a->ht_flag = flg;
1217     break;
1218   case MAT_HERMITIAN:
1219     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1220     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1221 
1222     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1223     break;
1224   case MAT_SPD:
1225     A->spd_set = PETSC_TRUE;
1226     A->spd     = flg;
1227     if (flg) {
1228       A->symmetric                  = PETSC_TRUE;
1229       A->structurally_symmetric     = PETSC_TRUE;
1230       A->symmetric_set              = PETSC_TRUE;
1231       A->structurally_symmetric_set = PETSC_TRUE;
1232     }
1233     break;
1234   case MAT_SYMMETRIC:
1235     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1236     break;
1237   case MAT_STRUCTURALLY_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1239     break;
1240   case MAT_SYMMETRY_ETERNAL:
1241     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1242     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1243     break;
1244   case MAT_IGNORE_LOWER_TRIANGULAR:
1245     aA->ignore_ltriangular = flg;
1246     break;
1247   case MAT_ERROR_LOWER_TRIANGULAR:
1248     aA->ignore_ltriangular = flg;
1249     break;
1250   case MAT_GETROW_UPPERTRIANGULAR:
1251     aA->getrow_utriangular = flg;
1252     break;
1253   default:
1254     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1261 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1262 {
1263   PetscErrorCode ierr;
1264 
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a     = baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscBool      flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr) {
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1286   }
1287   if (!ll) PetscFunctionReturn(0);
1288 
1289   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1290   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1291 
1292   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1293   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1294 
1295   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296 
1297   /* left diagonalscale the off-diagonal part */
1298   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1299 
1300   /* scale the diagonal part */
1301   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1302 
1303   /* right diagonalscale the off-diagonal part */
1304   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1311 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatEqual_MPISBAIJ"
1325 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1326 {
1327   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1328   Mat            a,b,c,d;
1329   PetscBool      flg;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPISBAIJ"
1346 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1350   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1351 
1352   PetscFunctionBegin;
1353   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1354   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1355     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1358   } else {
1359     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1360     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1367 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1368 {
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1390     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1391     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1392     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1393     xb   = (Mat_SeqBAIJ*)xx->B->data;
1394     yb   = (Mat_SeqBAIJ*)yy->B->data;
1395     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1396     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1397     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1398   } else {
1399     Mat      B;
1400     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1401     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1402     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1403     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1404     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1405     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1406     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1407     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1408     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1409     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1410     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1411     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1412     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1413     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1414     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1415     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1416     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1417     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1418     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1419     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1426 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1427 {
1428   PetscErrorCode ierr;
1429   PetscInt       i;
1430   PetscBool      flg;
1431 
1432   PetscFunctionBegin;
1433   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1434   for (i=0; i<n; i++) {
1435     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1436     if (!flg) { /* *B[i] is non-symmetric, set flag */
1437       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1438     }
1439   }
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 /* -------------------------------------------------------------------*/
1444 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1445                                        MatGetRow_MPISBAIJ,
1446                                        MatRestoreRow_MPISBAIJ,
1447                                        MatMult_MPISBAIJ,
1448                                /*  4*/ MatMultAdd_MPISBAIJ,
1449                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1450                                        MatMultAdd_MPISBAIJ,
1451                                        0,
1452                                        0,
1453                                        0,
1454                                /* 10*/ 0,
1455                                        0,
1456                                        0,
1457                                        MatSOR_MPISBAIJ,
1458                                        MatTranspose_MPISBAIJ,
1459                                /* 15*/ MatGetInfo_MPISBAIJ,
1460                                        MatEqual_MPISBAIJ,
1461                                        MatGetDiagonal_MPISBAIJ,
1462                                        MatDiagonalScale_MPISBAIJ,
1463                                        MatNorm_MPISBAIJ,
1464                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1465                                        MatAssemblyEnd_MPISBAIJ,
1466                                        MatSetOption_MPISBAIJ,
1467                                        MatZeroEntries_MPISBAIJ,
1468                                /* 24*/ 0,
1469                                        0,
1470                                        0,
1471                                        0,
1472                                        0,
1473                                /* 29*/ MatSetUp_MPISBAIJ,
1474                                        0,
1475                                        0,
1476                                        0,
1477                                        0,
1478                                /* 34*/ MatDuplicate_MPISBAIJ,
1479                                        0,
1480                                        0,
1481                                        0,
1482                                        0,
1483                                /* 39*/ MatAXPY_MPISBAIJ,
1484                                        MatGetSubMatrices_MPISBAIJ,
1485                                        MatIncreaseOverlap_MPISBAIJ,
1486                                        MatGetValues_MPISBAIJ,
1487                                        MatCopy_MPISBAIJ,
1488                                /* 44*/ 0,
1489                                        MatScale_MPISBAIJ,
1490                                        0,
1491                                        0,
1492                                        0,
1493                                /* 49*/ 0,
1494                                        0,
1495                                        0,
1496                                        0,
1497                                        0,
1498                                /* 54*/ 0,
1499                                        0,
1500                                        MatSetUnfactored_MPISBAIJ,
1501                                        0,
1502                                        MatSetValuesBlocked_MPISBAIJ,
1503                                /* 59*/ 0,
1504                                        0,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                /* 64*/ 0,
1509                                        0,
1510                                        0,
1511                                        0,
1512                                        0,
1513                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                        0,
1518                                /* 74*/ 0,
1519                                        0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                /* 79*/ 0,
1524                                        0,
1525                                        0,
1526                                        0,
1527                                        MatLoad_MPISBAIJ,
1528                                /* 84*/ 0,
1529                                        0,
1530                                        0,
1531                                        0,
1532                                        0,
1533                                /* 89*/ 0,
1534                                        0,
1535                                        0,
1536                                        0,
1537                                        0,
1538                                /* 94*/ 0,
1539                                        0,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                /* 99*/ 0,
1544                                        0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                /*104*/ 0,
1549                                        MatRealPart_MPISBAIJ,
1550                                        MatImaginaryPart_MPISBAIJ,
1551                                        MatGetRowUpperTriangular_MPISBAIJ,
1552                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1553                                /*109*/ 0,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                /*114*/ 0,
1559                                        0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                /*119*/ 0,
1564                                        0,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                /*124*/ 0,
1569                                        0,
1570                                        0,
1571                                        0,
1572                                        0,
1573                                /*129*/ 0,
1574                                        0,
1575                                        0,
1576                                        0,
1577                                        0,
1578                                /*134*/ 0,
1579                                        0,
1580                                        0,
1581                                        0,
1582                                        0,
1583                                /*139*/ 0,
1584                                        0,
1585                                        0,
1586                                        0,
1587                                        0,
1588                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1589 };
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1593 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1594 {
1595   PetscFunctionBegin;
1596   *a = ((Mat_MPISBAIJ*)A->data)->A;
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1602 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1603 {
1604   Mat_MPISBAIJ   *b;
1605   PetscErrorCode ierr;
1606   PetscInt       i,mbs,Mbs;
1607 
1608   PetscFunctionBegin;
1609   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1610   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1611   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1612   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1613 
1614   b   = (Mat_MPISBAIJ*)B->data;
1615   mbs = B->rmap->n/bs;
1616   Mbs = B->rmap->N/bs;
1617   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1618 
1619   B->rmap->bs = bs;
1620   b->bs2      = bs*bs;
1621   b->mbs      = mbs;
1622   b->Mbs      = Mbs;
1623   b->nbs      = B->cmap->n/bs;
1624   b->Nbs      = B->cmap->N/bs;
1625 
1626   for (i=0; i<=b->size; i++) {
1627     b->rangebs[i] = B->rmap->range[i]/bs;
1628   }
1629   b->rstartbs = B->rmap->rstart/bs;
1630   b->rendbs   = B->rmap->rend/bs;
1631 
1632   b->cstartbs = B->cmap->rstart/bs;
1633   b->cendbs   = B->cmap->rend/bs;
1634 
1635   if (!B->preallocated) {
1636     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1637     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1638     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1639     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1640     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1641     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1642     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1643     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1644     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1645   }
1646 
1647   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1648   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1649 
1650   B->preallocated = PETSC_TRUE;
1651   PetscFunctionReturn(0);
1652 }
1653 
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1656 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1657 {
1658   PetscInt       m,rstart,cstart,cend;
1659   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1660   const PetscInt *JJ    =0;
1661   PetscScalar    *values=0;
1662   PetscErrorCode ierr;
1663 
1664   PetscFunctionBegin;
1665   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1666   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1667   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1668   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1669   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1670   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1671   m      = B->rmap->n/bs;
1672   rstart = B->rmap->rstart/bs;
1673   cstart = B->cmap->rstart/bs;
1674   cend   = B->cmap->rend/bs;
1675 
1676   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1677   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1678   for (i=0; i<m; i++) {
1679     nz = ii[i+1] - ii[i];
1680     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1681     nz_max = PetscMax(nz_max,nz);
1682     JJ     = jj + ii[i];
1683     for (j=0; j<nz; j++) {
1684       if (*JJ >= cstart) break;
1685       JJ++;
1686     }
1687     d = 0;
1688     for (; j<nz; j++) {
1689       if (*JJ++ >= cend) break;
1690       d++;
1691     }
1692     d_nnz[i] = d;
1693     o_nnz[i] = nz - d;
1694   }
1695   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1696   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1697 
1698   values = (PetscScalar*)V;
1699   if (!values) {
1700     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1701     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1702   }
1703   for (i=0; i<m; i++) {
1704     PetscInt          row    = i + rstart;
1705     PetscInt          ncols  = ii[i+1] - ii[i];
1706     const PetscInt    *icols = jj + ii[i];
1707     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1708     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1709   }
1710 
1711   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1712   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1713   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1714   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 #if defined(PETSC_HAVE_MUMPS)
1719 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1720 #endif
1721 #if defined(PETSC_HAVE_PASTIX)
1722 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1723 #endif
1724 
1725 /*MC
1726    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1727    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1728    the matrix is stored.
1729 
1730   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1731   can call MatSetOption(Mat, MAT_HERMITIAN);
1732 
1733    Options Database Keys:
1734 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1735 
1736   Level: beginner
1737 
1738 .seealso: MatCreateMPISBAIJ
1739 M*/
1740 
1741 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatCreate_MPISBAIJ"
1745 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1746 {
1747   Mat_MPISBAIJ   *b;
1748   PetscErrorCode ierr;
1749   PetscBool      flg;
1750 
1751   PetscFunctionBegin;
1752   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1753   B->data = (void*)b;
1754   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1755 
1756   B->ops->destroy = MatDestroy_MPISBAIJ;
1757   B->ops->view    = MatView_MPISBAIJ;
1758   B->assembled    = PETSC_FALSE;
1759   B->insertmode   = NOT_SET_VALUES;
1760 
1761   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1762   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1763 
1764   /* build local table of row and column ownerships */
1765   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1766 
1767   /* build cache for off array entries formed */
1768   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1769 
1770   b->donotstash  = PETSC_FALSE;
1771   b->colmap      = NULL;
1772   b->garray      = NULL;
1773   b->roworiented = PETSC_TRUE;
1774 
1775   /* stuff used in block assembly */
1776   b->barray = 0;
1777 
1778   /* stuff used for matrix vector multiply */
1779   b->lvec    = 0;
1780   b->Mvctx   = 0;
1781   b->slvec0  = 0;
1782   b->slvec0b = 0;
1783   b->slvec1  = 0;
1784   b->slvec1a = 0;
1785   b->slvec1b = 0;
1786   b->sMvctx  = 0;
1787 
1788   /* stuff for MatGetRow() */
1789   b->rowindices   = 0;
1790   b->rowvalues    = 0;
1791   b->getrowactive = PETSC_FALSE;
1792 
1793   /* hash table stuff */
1794   b->ht           = 0;
1795   b->hd           = 0;
1796   b->ht_size      = 0;
1797   b->ht_flag      = PETSC_FALSE;
1798   b->ht_fact      = 0;
1799   b->ht_total_ct  = 0;
1800   b->ht_insert_ct = 0;
1801 
1802   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1803   b->ijonly = PETSC_FALSE;
1804 
1805   b->in_loc = 0;
1806   b->v_loc  = 0;
1807   b->n_loc  = 0;
1808   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1809   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1810   if (flg) {
1811     PetscReal fact = 1.39;
1812     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1813     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1814     if (fact <= 1.0) fact = 1.39;
1815     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1816     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1817   }
1818   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1819 
1820 #if defined(PETSC_HAVE_PASTIX)
1821   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1822 #endif
1823 #if defined(PETSC_HAVE_MUMPS)
1824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1825 #endif
1826   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1828   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1829   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1831   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1832 
1833   B->symmetric                  = PETSC_TRUE;
1834   B->structurally_symmetric     = PETSC_TRUE;
1835   B->symmetric_set              = PETSC_TRUE;
1836   B->structurally_symmetric_set = PETSC_TRUE;
1837 
1838   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1839   PetscFunctionReturn(0);
1840 }
1841 
1842 /*MC
1843    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1844 
1845    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1846    and MATMPISBAIJ otherwise.
1847 
1848    Options Database Keys:
1849 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1850 
1851   Level: beginner
1852 
1853 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1854 M*/
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1858 /*@C
1859    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1860    the user should preallocate the matrix storage by setting the parameters
1861    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1862    performance can be increased by more than a factor of 50.
1863 
1864    Collective on Mat
1865 
1866    Input Parameters:
1867 +  B - the matrix
1868 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1869           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1870 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1871            submatrix  (same for all local rows)
1872 .  d_nnz - array containing the number of block nonzeros in the various block rows
1873            in the upper triangular and diagonal part of the in diagonal portion of the local
1874            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1875            for the diagonal entry and set a value even if it is zero.
1876 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1877            submatrix (same for all local rows).
1878 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1879            off-diagonal portion of the local submatrix that is right of the diagonal
1880            (possibly different for each block row) or NULL.
1881 
1882 
1883    Options Database Keys:
1884 .   -mat_no_unroll - uses code that does not unroll the loops in the
1885                      block calculations (much slower)
1886 .   -mat_block_size - size of the blocks to use
1887 
1888    Notes:
1889 
1890    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1891    than it must be used on all processors that share the object for that argument.
1892 
1893    If the *_nnz parameter is given then the *_nz parameter is ignored
1894 
1895    Storage Information:
1896    For a square global matrix we define each processor's diagonal portion
1897    to be its local rows and the corresponding columns (a square submatrix);
1898    each processor's off-diagonal portion encompasses the remainder of the
1899    local matrix (a rectangular submatrix).
1900 
1901    The user can specify preallocated storage for the diagonal part of
1902    the local submatrix with either d_nz or d_nnz (not both).  Set
1903    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1904    memory allocation.  Likewise, specify preallocated storage for the
1905    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1906 
1907    You can call MatGetInfo() to get information on how effective the preallocation was;
1908    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1909    You can also run with the option -info and look for messages with the string
1910    malloc in them to see if additional memory allocation was needed.
1911 
1912    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1913    the figure below we depict these three local rows and all columns (0-11).
1914 
1915 .vb
1916            0 1 2 3 4 5 6 7 8 9 10 11
1917           --------------------------
1918    row 3  |. . . d d d o o o o  o  o
1919    row 4  |. . . d d d o o o o  o  o
1920    row 5  |. . . d d d o o o o  o  o
1921           --------------------------
1922 .ve
1923 
1924    Thus, any entries in the d locations are stored in the d (diagonal)
1925    submatrix, and any entries in the o locations are stored in the
1926    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1927    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1928 
1929    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1930    plus the diagonal part of the d matrix,
1931    and o_nz should indicate the number of block nonzeros per row in the o matrix
1932 
1933    In general, for PDE problems in which most nonzeros are near the diagonal,
1934    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1935    or you will get TERRIBLE performance; see the users' manual chapter on
1936    matrices.
1937 
1938    Level: intermediate
1939 
1940 .keywords: matrix, block, aij, compressed row, sparse, parallel
1941 
1942 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1943 @*/
1944 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1945 {
1946   PetscErrorCode ierr;
1947 
1948   PetscFunctionBegin;
1949   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1950   PetscValidType(B,1);
1951   PetscValidLogicalCollectiveInt(B,bs,2);
1952   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatCreateSBAIJ"
1958 /*@C
1959    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1960    (block compressed row).  For good matrix assembly performance
1961    the user should preallocate the matrix storage by setting the parameters
1962    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1963    performance can be increased by more than a factor of 50.
1964 
1965    Collective on MPI_Comm
1966 
1967    Input Parameters:
1968 +  comm - MPI communicator
1969 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1970           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1971 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1972            This value should be the same as the local size used in creating the
1973            y vector for the matrix-vector product y = Ax.
1974 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1975            This value should be the same as the local size used in creating the
1976            x vector for the matrix-vector product y = Ax.
1977 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1978 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1979 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1980            submatrix  (same for all local rows)
1981 .  d_nnz - array containing the number of block nonzeros in the various block rows
1982            in the upper triangular portion of the in diagonal portion of the local
1983            (possibly different for each block block row) or NULL.
1984            If you plan to factor the matrix you must leave room for the diagonal entry and
1985            set its value even if it is zero.
1986 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1987            submatrix (same for all local rows).
1988 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1989            off-diagonal portion of the local submatrix (possibly different for
1990            each block row) or NULL.
1991 
1992    Output Parameter:
1993 .  A - the matrix
1994 
1995    Options Database Keys:
1996 .   -mat_no_unroll - uses code that does not unroll the loops in the
1997                      block calculations (much slower)
1998 .   -mat_block_size - size of the blocks to use
1999 .   -mat_mpi - use the parallel matrix data structures even on one processor
2000                (defaults to using SeqBAIJ format on one processor)
2001 
2002    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2003    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2004    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2005 
2006    Notes:
2007    The number of rows and columns must be divisible by blocksize.
2008    This matrix type does not support complex Hermitian operation.
2009 
2010    The user MUST specify either the local or global matrix dimensions
2011    (possibly both).
2012 
2013    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2014    than it must be used on all processors that share the object for that argument.
2015 
2016    If the *_nnz parameter is given then the *_nz parameter is ignored
2017 
2018    Storage Information:
2019    For a square global matrix we define each processor's diagonal portion
2020    to be its local rows and the corresponding columns (a square submatrix);
2021    each processor's off-diagonal portion encompasses the remainder of the
2022    local matrix (a rectangular submatrix).
2023 
2024    The user can specify preallocated storage for the diagonal part of
2025    the local submatrix with either d_nz or d_nnz (not both).  Set
2026    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2027    memory allocation.  Likewise, specify preallocated storage for the
2028    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2029 
2030    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2031    the figure below we depict these three local rows and all columns (0-11).
2032 
2033 .vb
2034            0 1 2 3 4 5 6 7 8 9 10 11
2035           --------------------------
2036    row 3  |. . . d d d o o o o  o  o
2037    row 4  |. . . d d d o o o o  o  o
2038    row 5  |. . . d d d o o o o  o  o
2039           --------------------------
2040 .ve
2041 
2042    Thus, any entries in the d locations are stored in the d (diagonal)
2043    submatrix, and any entries in the o locations are stored in the
2044    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2045    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2046 
2047    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2048    plus the diagonal part of the d matrix,
2049    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2050    In general, for PDE problems in which most nonzeros are near the diagonal,
2051    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2052    or you will get TERRIBLE performance; see the users' manual chapter on
2053    matrices.
2054 
2055    Level: intermediate
2056 
2057 .keywords: matrix, block, aij, compressed row, sparse, parallel
2058 
2059 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2060 @*/
2061 
2062 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2063 {
2064   PetscErrorCode ierr;
2065   PetscMPIInt    size;
2066 
2067   PetscFunctionBegin;
2068   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2069   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2070   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2071   if (size > 1) {
2072     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2073     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2074   } else {
2075     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2076     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2077   }
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2084 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2085 {
2086   Mat            mat;
2087   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2088   PetscErrorCode ierr;
2089   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2090   PetscScalar    *array;
2091 
2092   PetscFunctionBegin;
2093   *newmat = 0;
2094 
2095   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2096   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2097   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2098   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2099   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2100   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2101 
2102   mat->factortype   = matin->factortype;
2103   mat->preallocated = PETSC_TRUE;
2104   mat->assembled    = PETSC_TRUE;
2105   mat->insertmode   = NOT_SET_VALUES;
2106 
2107   a      = (Mat_MPISBAIJ*)mat->data;
2108   a->bs2 = oldmat->bs2;
2109   a->mbs = oldmat->mbs;
2110   a->nbs = oldmat->nbs;
2111   a->Mbs = oldmat->Mbs;
2112   a->Nbs = oldmat->Nbs;
2113 
2114 
2115   a->size         = oldmat->size;
2116   a->rank         = oldmat->rank;
2117   a->donotstash   = oldmat->donotstash;
2118   a->roworiented  = oldmat->roworiented;
2119   a->rowindices   = 0;
2120   a->rowvalues    = 0;
2121   a->getrowactive = PETSC_FALSE;
2122   a->barray       = 0;
2123   a->rstartbs     = oldmat->rstartbs;
2124   a->rendbs       = oldmat->rendbs;
2125   a->cstartbs     = oldmat->cstartbs;
2126   a->cendbs       = oldmat->cendbs;
2127 
2128   /* hash table stuff */
2129   a->ht           = 0;
2130   a->hd           = 0;
2131   a->ht_size      = 0;
2132   a->ht_flag      = oldmat->ht_flag;
2133   a->ht_fact      = oldmat->ht_fact;
2134   a->ht_total_ct  = 0;
2135   a->ht_insert_ct = 0;
2136 
2137   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2138   if (oldmat->colmap) {
2139 #if defined(PETSC_USE_CTABLE)
2140     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2141 #else
2142     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2143     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2144     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2145 #endif
2146   } else a->colmap = 0;
2147 
2148   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2149     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2150     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2151     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2152   } else a->garray = 0;
2153 
2154   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2155   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2157   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2158   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2159 
2160   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2161   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2162   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2163   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2164 
2165   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2166   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2167   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2168   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2169   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2170   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2171   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2172   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2173   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2174   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2175   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2177   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2178 
2179   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2180   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2181   a->sMvctx = oldmat->sMvctx;
2182   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2183 
2184   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2185   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2186   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2187   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2188   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2189   *newmat = mat;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatLoad_MPISBAIJ"
2195 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2196 {
2197   PetscErrorCode ierr;
2198   PetscInt       i,nz,j,rstart,rend;
2199   PetscScalar    *vals,*buf;
2200   MPI_Comm       comm;
2201   MPI_Status     status;
2202   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2203   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2204   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2205   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2206   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2207   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2208   int            fd;
2209 
2210   PetscFunctionBegin;
2211   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2212   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2213   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2214   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2215 
2216   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2217   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2218   if (!rank) {
2219     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2220     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2221     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2222     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2223   }
2224 
2225   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2226 
2227   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2228   M    = header[1];
2229   N    = header[2];
2230 
2231   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2232   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2233   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2234 
2235   /* If global sizes are set, check if they are consistent with that given in the file */
2236   if (sizesset) {
2237     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2238   }
2239   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2240   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2241 
2242   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2243 
2244   /*
2245      This code adds extra rows to make sure the number of rows is
2246      divisible by the blocksize
2247   */
2248   Mbs        = M/bs;
2249   extra_rows = bs - M + bs*(Mbs);
2250   if (extra_rows == bs) extra_rows = 0;
2251   else                  Mbs++;
2252   if (extra_rows &&!rank) {
2253     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2254   }
2255 
2256   /* determine ownership of all rows */
2257   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2258     mbs = Mbs/size + ((Mbs % size) > rank);
2259     m   = mbs*bs;
2260   } else { /* User Set */
2261     m   = newmat->rmap->n;
2262     mbs = m/bs;
2263   }
2264   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2265   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2266   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2267   rowners[0] = 0;
2268   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2269   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2270   rstart = rowners[rank];
2271   rend   = rowners[rank+1];
2272 
2273   /* distribute row lengths to all processors */
2274   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2275   if (!rank) {
2276     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2277     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2278     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2279     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2280     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2281     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2282     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2283   } else {
2284     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2285   }
2286 
2287   if (!rank) {   /* procs[0] */
2288     /* calculate the number of nonzeros on each processor */
2289     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2290     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2291     for (i=0; i<size; i++) {
2292       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2293         procsnz[i] += rowlengths[j];
2294       }
2295     }
2296     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2297 
2298     /* determine max buffer needed and allocate it */
2299     maxnz = 0;
2300     for (i=0; i<size; i++) {
2301       maxnz = PetscMax(maxnz,procsnz[i]);
2302     }
2303     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2304 
2305     /* read in my part of the matrix column indices  */
2306     nz     = procsnz[0];
2307     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2308     mycols = ibuf;
2309     if (size == 1) nz -= extra_rows;
2310     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2311     if (size == 1) {
2312       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2313     }
2314 
2315     /* read in every ones (except the last) and ship off */
2316     for (i=1; i<size-1; i++) {
2317       nz   = procsnz[i];
2318       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2319       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2320     }
2321     /* read in the stuff for the last proc */
2322     if (size != 1) {
2323       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2324       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2325       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2326       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2327     }
2328     ierr = PetscFree(cols);CHKERRQ(ierr);
2329   } else {  /* procs[i], i>0 */
2330     /* determine buffer space needed for message */
2331     nz = 0;
2332     for (i=0; i<m; i++) nz += locrowlens[i];
2333     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2334     mycols = ibuf;
2335     /* receive message of column indices*/
2336     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2337     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2338     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2339   }
2340 
2341   /* loop over local rows, determining number of off diagonal entries */
2342   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2343   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2344   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2345   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2346   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2347   rowcount = 0;
2348   nzcount  = 0;
2349   for (i=0; i<mbs; i++) {
2350     dcount  = 0;
2351     odcount = 0;
2352     for (j=0; j<bs; j++) {
2353       kmax = locrowlens[rowcount];
2354       for (k=0; k<kmax; k++) {
2355         tmp = mycols[nzcount++]/bs; /* block col. index */
2356         if (!mask[tmp]) {
2357           mask[tmp] = 1;
2358           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2359           else masked1[dcount++] = tmp; /* entry in diag portion */
2360         }
2361       }
2362       rowcount++;
2363     }
2364 
2365     dlens[i]  = dcount;  /* d_nzz[i] */
2366     odlens[i] = odcount; /* o_nzz[i] */
2367 
2368     /* zero out the mask elements we set */
2369     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2370     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2371   }
2372   if (!sizesset) {
2373     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2374   }
2375   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2376   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2377 
2378   if (!rank) {
2379     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2380     /* read in my part of the matrix numerical values  */
2381     nz     = procsnz[0];
2382     vals   = buf;
2383     mycols = ibuf;
2384     if (size == 1) nz -= extra_rows;
2385     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2386     if (size == 1) {
2387       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2388     }
2389 
2390     /* insert into matrix */
2391     jj = rstart*bs;
2392     for (i=0; i<m; i++) {
2393       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2394       mycols += locrowlens[i];
2395       vals   += locrowlens[i];
2396       jj++;
2397     }
2398 
2399     /* read in other processors (except the last one) and ship out */
2400     for (i=1; i<size-1; i++) {
2401       nz   = procsnz[i];
2402       vals = buf;
2403       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2404       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2405     }
2406     /* the last proc */
2407     if (size != 1) {
2408       nz   = procsnz[i] - extra_rows;
2409       vals = buf;
2410       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2411       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2412       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2413     }
2414     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2415 
2416   } else {
2417     /* receive numeric values */
2418     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2419 
2420     /* receive message of values*/
2421     vals   = buf;
2422     mycols = ibuf;
2423     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2424     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2425     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2426 
2427     /* insert into matrix */
2428     jj = rstart*bs;
2429     for (i=0; i<m; i++) {
2430       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2431       mycols += locrowlens[i];
2432       vals   += locrowlens[i];
2433       jj++;
2434     }
2435   }
2436 
2437   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2438   ierr = PetscFree(buf);CHKERRQ(ierr);
2439   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2440   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2441   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2442   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2443   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2444   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 #undef __FUNCT__
2449 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2450 /*XXXXX@
2451    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2452 
2453    Input Parameters:
2454 .  mat  - the matrix
2455 .  fact - factor
2456 
2457    Not Collective on Mat, each process can have a different hash factor
2458 
2459    Level: advanced
2460 
2461   Notes:
2462    This can also be set by the command line option: -mat_use_hash_table fact
2463 
2464 .keywords: matrix, hashtable, factor, HT
2465 
2466 .seealso: MatSetOption()
2467 @XXXXX*/
2468 
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2472 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2473 {
2474   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2475   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2476   PetscReal      atmp;
2477   PetscReal      *work,*svalues,*rvalues;
2478   PetscErrorCode ierr;
2479   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2480   PetscMPIInt    rank,size;
2481   PetscInt       *rowners_bs,dest,count,source;
2482   PetscScalar    *va;
2483   MatScalar      *ba;
2484   MPI_Status     stat;
2485 
2486   PetscFunctionBegin;
2487   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2488   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2489   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2490 
2491   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2492   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2493 
2494   bs  = A->rmap->bs;
2495   mbs = a->mbs;
2496   Mbs = a->Mbs;
2497   ba  = b->a;
2498   bi  = b->i;
2499   bj  = b->j;
2500 
2501   /* find ownerships */
2502   rowners_bs = A->rmap->range;
2503 
2504   /* each proc creates an array to be distributed */
2505   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2506   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2507 
2508   /* row_max for B */
2509   if (rank != size-1) {
2510     for (i=0; i<mbs; i++) {
2511       ncols = bi[1] - bi[0]; bi++;
2512       brow  = bs*i;
2513       for (j=0; j<ncols; j++) {
2514         bcol = bs*(*bj);
2515         for (kcol=0; kcol<bs; kcol++) {
2516           col  = bcol + kcol;                /* local col index */
2517           col += rowners_bs[rank+1];      /* global col index */
2518           for (krow=0; krow<bs; krow++) {
2519             atmp = PetscAbsScalar(*ba); ba++;
2520             row  = brow + krow;   /* local row index */
2521             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2522             if (work[col] < atmp) work[col] = atmp;
2523           }
2524         }
2525         bj++;
2526       }
2527     }
2528 
2529     /* send values to its owners */
2530     for (dest=rank+1; dest<size; dest++) {
2531       svalues = work + rowners_bs[dest];
2532       count   = rowners_bs[dest+1]-rowners_bs[dest];
2533       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2534     }
2535   }
2536 
2537   /* receive values */
2538   if (rank) {
2539     rvalues = work;
2540     count   = rowners_bs[rank+1]-rowners_bs[rank];
2541     for (source=0; source<rank; source++) {
2542       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2543       /* process values */
2544       for (i=0; i<count; i++) {
2545         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2546       }
2547     }
2548   }
2549 
2550   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2551   ierr = PetscFree(work);CHKERRQ(ierr);
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 #undef __FUNCT__
2556 #define __FUNCT__ "MatSOR_MPISBAIJ"
2557 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2558 {
2559   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2560   PetscErrorCode    ierr;
2561   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2562   PetscScalar       *x,*ptr,*from;
2563   Vec               bb1;
2564   const PetscScalar *b;
2565 
2566   PetscFunctionBegin;
2567   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2568   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2569 
2570   if (flag == SOR_APPLY_UPPER) {
2571     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2572     PetscFunctionReturn(0);
2573   }
2574 
2575   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2576     if (flag & SOR_ZERO_INITIAL_GUESS) {
2577       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2578       its--;
2579     }
2580 
2581     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2582     while (its--) {
2583 
2584       /* lower triangular part: slvec0b = - B^T*xx */
2585       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2586 
2587       /* copy xx into slvec0a */
2588       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2589       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2590       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2591       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2592 
2593       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2594 
2595       /* copy bb into slvec1a */
2596       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2597       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2598       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2599       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2600 
2601       /* set slvec1b = 0 */
2602       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2603 
2604       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2605       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2606       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2607       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2608 
2609       /* upper triangular part: bb1 = bb1 - B*x */
2610       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2611 
2612       /* local diagonal sweep */
2613       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2614     }
2615     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2616   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2617     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2618   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2619     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2620   } else if (flag & SOR_EISENSTAT) {
2621     Vec               xx1;
2622     PetscBool         hasop;
2623     const PetscScalar *diag;
2624     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2625     PetscInt          i,n;
2626 
2627     if (!mat->xx1) {
2628       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2629       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2630     }
2631     xx1 = mat->xx1;
2632     bb1 = mat->bb1;
2633 
2634     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2635 
2636     if (!mat->diag) {
2637       /* this is wrong for same matrix with new nonzero values */
2638       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2639       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2640     }
2641     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2642 
2643     if (hasop) {
2644       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2645       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2646     } else {
2647       /*
2648           These two lines are replaced by code that may be a bit faster for a good compiler
2649       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2650       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2651       */
2652       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2653       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2654       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2655       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2656       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2657       if (omega == 1.0) {
2658         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2659         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2660       } else {
2661         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2662         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2663       }
2664       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2665       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2666       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2667       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2668     }
2669 
2670     /* multiply off-diagonal portion of matrix */
2671     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2672     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2673     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2674     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2675     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2676     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2677     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2678     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2679     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2680     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2681 
2682     /* local sweep */
2683     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2684     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2685   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2686   PetscFunctionReturn(0);
2687 }
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2691 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2692 {
2693   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2694   PetscErrorCode ierr;
2695   Vec            lvec1,bb1;
2696 
2697   PetscFunctionBegin;
2698   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2699   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2700 
2701   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2702     if (flag & SOR_ZERO_INITIAL_GUESS) {
2703       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2704       its--;
2705     }
2706 
2707     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2708     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2709     while (its--) {
2710       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2711 
2712       /* lower diagonal part: bb1 = bb - B^T*xx */
2713       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2714       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2715 
2716       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2718       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2719 
2720       /* upper diagonal part: bb1 = bb1 - B*x */
2721       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2722       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2723 
2724       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2725 
2726       /* diagonal sweep */
2727       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2728     }
2729     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2730     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2731   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 #undef __FUNCT__
2736 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2737 /*@
2738      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2739          CSR format the local rows.
2740 
2741    Collective on MPI_Comm
2742 
2743    Input Parameters:
2744 +  comm - MPI communicator
2745 .  bs - the block size, only a block size of 1 is supported
2746 .  m - number of local rows (Cannot be PETSC_DECIDE)
2747 .  n - This value should be the same as the local size used in creating the
2748        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2749        calculated if N is given) For square matrices n is almost always m.
2750 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2751 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2752 .   i - row indices
2753 .   j - column indices
2754 -   a - matrix values
2755 
2756    Output Parameter:
2757 .   mat - the matrix
2758 
2759    Level: intermediate
2760 
2761    Notes:
2762        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2763      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2764      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2765 
2766        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2767 
2768 .keywords: matrix, aij, compressed row, sparse, parallel
2769 
2770 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2771           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2772 @*/
2773 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2774 {
2775   PetscErrorCode ierr;
2776 
2777 
2778   PetscFunctionBegin;
2779   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2780   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2781   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2782   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2783   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2784   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2791 /*@C
2792    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2793    (the default parallel PETSc format).
2794 
2795    Collective on MPI_Comm
2796 
2797    Input Parameters:
2798 +  B - the matrix
2799 .  bs - the block size
2800 .  i - the indices into j for the start of each local row (starts with zero)
2801 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2802 -  v - optional values in the matrix
2803 
2804    Level: developer
2805 
2806 .keywords: matrix, aij, compressed row, sparse, parallel
2807 
2808 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2809 @*/
2810 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2811 {
2812   PetscErrorCode ierr;
2813 
2814   PetscFunctionBegin;
2815   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2816   PetscFunctionReturn(0);
2817 }
2818 
2819 #undef __FUNCT__
2820 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2821 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2822 {
2823   PetscErrorCode ierr;
2824   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2825   PetscInt       *indx;
2826   PetscScalar    *values;
2827 
2828   PetscFunctionBegin;
2829   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2830   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2831     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2832     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2833     PetscInt       *bindx,rmax=a->rmax,j;
2834 
2835     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2836     mbs = m/bs; Nbs = N/cbs;
2837     if (n == PETSC_DECIDE) {
2838       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2839     }
2840     /* Check sum(n) = Nbs */
2841     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2842     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2843 
2844     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2845     rstart -= mbs;
2846 
2847     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2848     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2849     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2850     for (i=0; i<mbs; i++) {
2851       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2852       nnz = nnz/bs;
2853       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2854       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2855       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2856     }
2857     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2858     ierr = PetscFree(bindx);CHKERRQ(ierr);
2859 
2860     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2861     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2862     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2863     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2864     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2865     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2866   }
2867 
2868   /* numeric phase */
2869   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2870   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2871 
2872   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2873   for (i=0; i<m; i++) {
2874     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2875     Ii   = i + rstart;
2876     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2877     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2878   }
2879   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2880   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2881   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2882   PetscFunctionReturn(0);
2883 }
2884