xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision cc310fdddfd1255ff354b307e63afef1e66c265d)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84     A->nonzerostate++;\
85 a_noinsert:; \
86     ailen[brow] = nrow; \
87   }
88 
89 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90   { \
91     brow = row/bs;  \
92     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93     rmax = bimax[brow]; nrow = bilen[brow]; \
94     bcol = col/bs; \
95     ridx = row % bs; cidx = col % bs; \
96     low  = 0; high = nrow; \
97     while (high-low > 3) { \
98       t = (low+high)/2; \
99       if (rp[t] > bcol) high = t; \
100       else              low  = t; \
101     } \
102     for (_i=low; _i<high; _i++) { \
103       if (rp[_i] > bcol) break; \
104       if (rp[_i] == bcol) { \
105         bap = ap + bs2*_i + bs*cidx + ridx; \
106         if (addv == ADD_VALUES) *bap += value;  \
107         else                    *bap  = value;  \
108         goto b_noinsert; \
109       } \
110     } \
111     if (b->nonew == 1) goto b_noinsert; \
112     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114     N = nrow++ - 1;  \
115     /* shift up all the later entries in this row */ \
116     for (ii=N; ii>=_i; ii--) { \
117       rp[ii+1] = rp[ii]; \
118       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
119     } \
120     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
121     rp[_i]                      = bcol;  \
122     ap[bs2*_i + bs*cidx + ridx] = value;  \
123     B->nonzerostate++;\
124 b_noinsert:; \
125     bilen[brow] = nrow; \
126   }
127 
128 /* Only add/insert a(i,j) with i<=j (blocks).
129    Any a(i,j) with i>j input by user is ingored.
130 */
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSetValues_MPISBAIJ"
133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134 {
135   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
136   MatScalar      value;
137   PetscBool      roworiented = baij->roworiented;
138   PetscErrorCode ierr;
139   PetscInt       i,j,row,col;
140   PetscInt       rstart_orig=mat->rmap->rstart;
141   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
143 
144   /* Some Variables required in the macro */
145   Mat          A     = baij->A;
146   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
147   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148   MatScalar    *aa   =a->a;
149 
150   Mat         B     = baij->B;
151   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
152   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153   MatScalar   *ba   =b->a;
154 
155   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
156   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
157   MatScalar *ap,*bap;
158 
159   /* for stash */
160   PetscInt  n_loc, *in_loc = NULL;
161   MatScalar *v_loc = NULL;
162 
163   PetscFunctionBegin;
164   if (!baij->donotstash) {
165     if (n > baij->n_loc) {
166       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
167       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
168       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
169       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
170 
171       baij->n_loc = n;
172     }
173     in_loc = baij->in_loc;
174     v_loc  = baij->v_loc;
175   }
176 
177   for (i=0; i<m; i++) {
178     if (im[i] < 0) continue;
179 #if defined(PETSC_USE_DEBUG)
180     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
181 #endif
182     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
183       row = im[i] - rstart_orig;              /* local row index */
184       for (j=0; j<n; j++) {
185         if (im[i]/bs > in[j]/bs) {
186           if (a->ignore_ltriangular) {
187             continue;    /* ignore lower triangular blocks */
188           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
189         }
190         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
191           col  = in[j] - cstart_orig;         /* local col index */
192           brow = row/bs; bcol = col/bs;
193           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
194           if (roworiented) value = v[i*n+j];
195           else             value = v[i+j*m];
196           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
197           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
198         } else if (in[j] < 0) continue;
199 #if defined(PETSC_USE_DEBUG)
200         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
201 #endif
202         else {  /* off-diag entry (B) */
203           if (mat->was_assembled) {
204             if (!baij->colmap) {
205               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
206             }
207 #if defined(PETSC_USE_CTABLE)
208             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
209             col  = col - 1;
210 #else
211             col = baij->colmap[in[j]/bs] - 1;
212 #endif
213             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
214               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
215               col  =  in[j];
216               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
217               B    = baij->B;
218               b    = (Mat_SeqBAIJ*)(B)->data;
219               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
220               ba   = b->a;
221             } else col += in[j]%bs;
222           } else col = in[j];
223           if (roworiented) value = v[i*n+j];
224           else             value = v[i+j*m];
225           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
226           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
227         }
228       }
229     } else {  /* off processor entry */
230       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
231       if (!baij->donotstash) {
232         mat->assembled = PETSC_FALSE;
233         n_loc          = 0;
234         for (j=0; j<n; j++) {
235           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
236           in_loc[n_loc] = in[j];
237           if (roworiented) {
238             v_loc[n_loc] = v[i*n+j];
239           } else {
240             v_loc[n_loc] = v[j*m+i];
241           }
242           n_loc++;
243         }
244         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
245       }
246     }
247   }
248   PetscFunctionReturn(0);
249 }
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
253 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
254 {
255   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
256   const MatScalar *value;
257   MatScalar       *barray     =baij->barray;
258   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
259   PetscErrorCode  ierr;
260   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
261   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
262   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
263 
264   PetscFunctionBegin;
265   if (!barray) {
266     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
267     baij->barray = barray;
268   }
269 
270   if (roworiented) {
271     stepval = (n-1)*bs;
272   } else {
273     stepval = (m-1)*bs;
274   }
275   for (i=0; i<m; i++) {
276     if (im[i] < 0) continue;
277 #if defined(PETSC_USE_DEBUG)
278     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
279 #endif
280     if (im[i] >= rstart && im[i] < rend) {
281       row = im[i] - rstart;
282       for (j=0; j<n; j++) {
283         if (im[i] > in[j]) {
284           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
285           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
286         }
287         /* If NumCol = 1 then a copy is not required */
288         if ((roworiented) && (n == 1)) {
289           barray = (MatScalar*) v + i*bs2;
290         } else if ((!roworiented) && (m == 1)) {
291           barray = (MatScalar*) v + j*bs2;
292         } else { /* Here a copy is required */
293           if (roworiented) {
294             value = v + i*(stepval+bs)*bs + j*bs;
295           } else {
296             value = v + j*(stepval+bs)*bs + i*bs;
297           }
298           for (ii=0; ii<bs; ii++,value+=stepval) {
299             for (jj=0; jj<bs; jj++) {
300               *barray++ = *value++;
301             }
302           }
303           barray -=bs2;
304         }
305 
306         if (in[j] >= cstart && in[j] < cend) {
307           col  = in[j] - cstart;
308           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
309         } else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined(PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined(PETSC_USE_CTABLE)
330             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col  = in[j];
338             }
339           } else col = in[j];
340           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
341         }
342       }
343     } else {
344       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend) {
376           col  = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined(PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data--;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
396   }
397   PetscFunctionReturn(0);
398 }
399 
400 #undef __FUNCT__
401 #define __FUNCT__ "MatNorm_MPISBAIJ"
402 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
403 {
404   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
405   PetscErrorCode ierr;
406   PetscReal      sum[2],*lnorm2;
407 
408   PetscFunctionBegin;
409   if (baij->size == 1) {
410     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
411   } else {
412     if (type == NORM_FROBENIUS) {
413       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
414       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
415       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
416       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
418       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
419       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
420       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
421     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
422       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
423       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
424       PetscReal    *rsum,*rsum2,vabs;
425       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
426       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
427       MatScalar    *v;
428 
429       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
430       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
431       /* Amat */
432       v = amat->a; jj = amat->j;
433       for (brow=0; brow<mbs; brow++) {
434         grow = bs*(rstart + brow);
435         nz   = amat->i[brow+1] - amat->i[brow];
436         for (bcol=0; bcol<nz; bcol++) {
437           gcol = bs*(rstart + *jj); jj++;
438           for (col=0; col<bs; col++) {
439             for (row=0; row<bs; row++) {
440               vabs            = PetscAbsScalar(*v); v++;
441               rsum[gcol+col] += vabs;
442               /* non-diagonal block */
443               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
444             }
445           }
446         }
447       }
448       /* Bmat */
449       v = bmat->a; jj = bmat->j;
450       for (brow=0; brow<mbs; brow++) {
451         grow = bs*(rstart + brow);
452         nz = bmat->i[brow+1] - bmat->i[brow];
453         for (bcol=0; bcol<nz; bcol++) {
454           gcol = bs*garray[*jj]; jj++;
455           for (col=0; col<bs; col++) {
456             for (row=0; row<bs; row++) {
457               vabs            = PetscAbsScalar(*v); v++;
458               rsum[gcol+col] += vabs;
459               rsum[grow+row] += vabs;
460             }
461           }
462         }
463       }
464       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
465       *norm = 0.0;
466       for (col=0; col<mat->cmap->N; col++) {
467         if (rsum2[col] > *norm) *norm = rsum2[col];
468       }
469       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
470     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
471   }
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
477 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
478 {
479   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
480   PetscErrorCode ierr;
481   PetscInt       nstash,reallocs;
482   InsertMode     addv;
483 
484   PetscFunctionBegin;
485   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
486 
487   /* make sure all processors are either in INSERTMODE or ADDMODE */
488   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
489   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
490   mat->insertmode = addv; /* in case this processor had no cache */
491 
492   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
493   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
494   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
495   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
496   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
497   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
498   PetscFunctionReturn(0);
499 }
500 
501 #undef __FUNCT__
502 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
503 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
504 {
505   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
506   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
507   PetscErrorCode ierr;
508   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
509   PetscInt       *row,*col;
510   PetscBool      other_disassembled;
511   PetscMPIInt    n;
512   PetscBool      r1,r2,r3;
513   MatScalar      *val;
514   InsertMode     addv = mat->insertmode;
515 
516   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
517   PetscFunctionBegin;
518   if (!baij->donotstash &&  !mat->nooffprocentries) {
519     while (1) {
520       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
521       if (!flg) break;
522 
523       for (i=0; i<n;) {
524         /* Now identify the consecutive vals belonging to the same row */
525         for (j=i,rstart=row[j]; j<n; j++) {
526           if (row[j] != rstart) break;
527         }
528         if (j < n) ncols = j-i;
529         else       ncols = n-i;
530         /* Now assemble all these values with a single function call */
531         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
532         i    = j;
533       }
534     }
535     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
536     /* Now process the block-stash. Since the values are stashed column-oriented,
537        set the roworiented flag to column oriented, and after MatSetValues()
538        restore the original flags */
539     r1 = baij->roworiented;
540     r2 = a->roworiented;
541     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
542 
543     baij->roworiented = PETSC_FALSE;
544     a->roworiented    = PETSC_FALSE;
545 
546     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
547     while (1) {
548       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
549       if (!flg) break;
550 
551       for (i=0; i<n;) {
552         /* Now identify the consecutive vals belonging to the same row */
553         for (j=i,rstart=row[j]; j<n; j++) {
554           if (row[j] != rstart) break;
555         }
556         if (j < n) ncols = j-i;
557         else       ncols = n-i;
558         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
559         i    = j;
560       }
561     }
562     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
563 
564     baij->roworiented = r1;
565     a->roworiented    = r2;
566 
567     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
568   }
569 
570   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
571   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
572 
573   /* determine if any processor has disassembled, if so we must
574      also disassemble ourselfs, in order that we may reassemble. */
575   /*
576      if nonzero structure of submatrix B cannot change then we know that
577      no processor disassembled thus we can skip this stuff
578   */
579   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
580     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
581     if (mat->was_assembled && !other_disassembled) {
582       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
583     }
584   }
585 
586   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
587     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
588   }
589   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
590   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
591 
592   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
593 
594   baij->rowvalues = 0;
595 
596   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
597   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
598     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
599     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606 #include <petscdraw.h>
607 #undef __FUNCT__
608 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
609 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610 {
611   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
612   PetscErrorCode    ierr;
613   PetscInt          bs   = mat->rmap->bs;
614   PetscMPIInt       rank = baij->rank;
615   PetscBool         iascii,isdraw;
616   PetscViewer       sviewer;
617   PetscViewerFormat format;
618 
619   PetscFunctionBegin;
620   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
621   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
622   if (iascii) {
623     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
624     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
625       MatInfo info;
626       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
634       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
635       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
636       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
637       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_INFO) {
640       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
643       PetscFunctionReturn(0);
644     }
645   }
646 
647   if (isdraw) {
648     PetscDraw draw;
649     PetscBool isnull;
650     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
651     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
652   }
653 
654   {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661     const char   *matname;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
672     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
673     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
674 
675     /* copy over the A part */
676     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
677     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
678     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
679 
680     for (i=0; i<mbs; i++) {
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = (baij->cstartbs+aj[j])*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++;
688           a += bs;
689         }
690       }
691     }
692     /* copy over the B part */
693     Bloc = (Mat_SeqBAIJ*)baij->B->data;
694     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
695     for (i=0; i<mbs; i++) {
696 
697       rvals[0] = bs*(baij->rstartbs + i);
698       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
699       for (j=ai[i]; j<ai[i+1]; j++) {
700         col = baij->garray[aj[j]]*bs;
701         for (k=0; k<bs; k++) {
702           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
703           col++;
704           a += bs;
705         }
706       }
707     }
708     ierr = PetscFree(rvals);CHKERRQ(ierr);
709     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
711     /*
712        Everyone has to call to draw the matrix since the graphics waits are
713        synchronized across all processors that share the PetscDraw object
714     */
715     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
717     if (!rank) {
718       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
719       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
720     }
721     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
722     ierr = MatDestroy(&A);CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatView_MPISBAIJ"
729 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
730 {
731   PetscErrorCode ierr;
732   PetscBool      iascii,isdraw,issocket,isbinary;
733 
734   PetscFunctionBegin;
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
739   if (iascii || isdraw || issocket || isbinary) {
740     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined(PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
897 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
898 {
899   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
900   PetscErrorCode ierr;
901   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
902   PetscScalar    *x,*from,zero=0.0;
903 
904   PetscFunctionBegin;
905   /*
906   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
907   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
908   */
909   /* diagonal part */
910   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
911   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
912 
913   /* subdiagonal part */
914   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
915 
916   /* copy x into the vec slvec0 */
917   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
919   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
920   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
921 
922   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
924   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925 
926   /* supperdiagonal part */
927   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   /* do diagonal part */
941   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
942   /* do supperdiagonal part */
943   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
945 
946   /* do subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
948   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950   PetscFunctionReturn(0);
951 }
952 
953 /*
954   This only works correctly for square matrices where the subblock A->A is the
955    diagonal block
956 */
957 #undef __FUNCT__
958 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
959 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
960 {
961   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
966   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatScale_MPISBAIJ"
972 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
973 {
974   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
975   PetscErrorCode ierr;
976 
977   PetscFunctionBegin;
978   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
979   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatGetRow_MPISBAIJ"
985 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
986 {
987   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
988   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
989   PetscErrorCode ierr;
990   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
991   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
992   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
993 
994   PetscFunctionBegin;
995   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
996   mat->getrowactive = PETSC_TRUE;
997 
998   if (!mat->rowvalues && (idx || v)) {
999     /*
1000         allocate enough space to hold information from the longest row.
1001     */
1002     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1003     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1004     PetscInt     max = 1,mbs = mat->mbs,tmp;
1005     for (i=0; i<mbs; i++) {
1006       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1007       if (max < tmp) max = tmp;
1008     }
1009     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1010   }
1011 
1012   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1013   lrow = row - brstart;  /* local row index */
1014 
1015   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1016   if (!v)   {pvA = 0; pvB = 0;}
1017   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1018   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1019   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1020   nztot = nzA + nzB;
1021 
1022   cmap = mat->garray;
1023   if (v  || idx) {
1024     if (nztot) {
1025       /* Sort by increasing column numbers, assuming A and B already sorted */
1026       PetscInt imark = -1;
1027       if (v) {
1028         *v = v_p = mat->rowvalues;
1029         for (i=0; i<nzB; i++) {
1030           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1031           else break;
1032         }
1033         imark = i;
1034         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1035         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1036       }
1037       if (idx) {
1038         *idx = idx_p = mat->rowindices;
1039         if (imark > -1) {
1040           for (i=0; i<imark; i++) {
1041             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1042           }
1043         } else {
1044           for (i=0; i<nzB; i++) {
1045             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1046             else break;
1047           }
1048           imark = i;
1049         }
1050         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1051         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1052       }
1053     } else {
1054       if (idx) *idx = 0;
1055       if (v)   *v   = 0;
1056     }
1057   }
1058   *nz  = nztot;
1059   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1060   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1066 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1067 {
1068   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1069 
1070   PetscFunctionBegin;
1071   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1072   baij->getrowactive = PETSC_FALSE;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1078 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1079 {
1080   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1081   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1082 
1083   PetscFunctionBegin;
1084   aA->getrow_utriangular = PETSC_TRUE;
1085   PetscFunctionReturn(0);
1086 }
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1089 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1090 {
1091   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1092   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1093 
1094   PetscFunctionBegin;
1095   aA->getrow_utriangular = PETSC_FALSE;
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1101 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1102 {
1103   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1108   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1114 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1115 {
1116   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1117   PetscErrorCode ierr;
1118 
1119   PetscFunctionBegin;
1120   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1121   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1127 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1128 {
1129   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1134   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1140 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1141 {
1142   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1143   Mat            A  = a->A,B = a->B;
1144   PetscErrorCode ierr;
1145   PetscReal      isend[5],irecv[5];
1146 
1147   PetscFunctionBegin;
1148   info->block_size = (PetscReal)matin->rmap->bs;
1149 
1150   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1151 
1152   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153   isend[3] = info->memory;  isend[4] = info->mallocs;
1154 
1155   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1156 
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1167 
1168     info->nz_used      = irecv[0];
1169     info->nz_allocated = irecv[1];
1170     info->nz_unneeded  = irecv[2];
1171     info->memory       = irecv[3];
1172     info->mallocs      = irecv[4];
1173   } else if (flag == MAT_GLOBAL_SUM) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1175 
1176     info->nz_used      = irecv[0];
1177     info->nz_allocated = irecv[1];
1178     info->nz_unneeded  = irecv[2];
1179     info->memory       = irecv[3];
1180     info->mallocs      = irecv[4];
1181   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1191 {
1192   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208 
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_NEW_DIAGONALS:
1213     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1214     break;
1215   case MAT_IGNORE_OFF_PROC_ENTRIES:
1216     a->donotstash = flg;
1217     break;
1218   case MAT_USE_HASH_TABLE:
1219     a->ht_flag = flg;
1220     break;
1221   case MAT_HERMITIAN:
1222     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1223     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1224 
1225     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1226     break;
1227   case MAT_SPD:
1228     A->spd_set = PETSC_TRUE;
1229     A->spd     = flg;
1230     if (flg) {
1231       A->symmetric                  = PETSC_TRUE;
1232       A->structurally_symmetric     = PETSC_TRUE;
1233       A->symmetric_set              = PETSC_TRUE;
1234       A->structurally_symmetric_set = PETSC_TRUE;
1235     }
1236     break;
1237   case MAT_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1239     break;
1240   case MAT_STRUCTURALLY_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1242     break;
1243   case MAT_SYMMETRY_ETERNAL:
1244     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1245     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1246     break;
1247   case MAT_IGNORE_LOWER_TRIANGULAR:
1248     aA->ignore_ltriangular = flg;
1249     break;
1250   case MAT_ERROR_LOWER_TRIANGULAR:
1251     aA->ignore_ltriangular = flg;
1252     break;
1253   case MAT_GETROW_UPPERTRIANGULAR:
1254     aA->getrow_utriangular = flg;
1255     break;
1256   default:
1257     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1264 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1265 {
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269   if (MAT_INITIAL_MATRIX || *B != A) {
1270     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1277 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1278 {
1279   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1280   Mat            a     = baij->A, b=baij->B;
1281   PetscErrorCode ierr;
1282   PetscInt       nv,m,n;
1283   PetscBool      flg;
1284 
1285   PetscFunctionBegin;
1286   if (ll != rr) {
1287     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1288     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1289   }
1290   if (!ll) PetscFunctionReturn(0);
1291 
1292   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1293   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1294 
1295   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1296   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1297 
1298   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1299 
1300   /* left diagonalscale the off-diagonal part */
1301   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1302 
1303   /* scale the diagonal part */
1304   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1305 
1306   /* right diagonalscale the off-diagonal part */
1307   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1308   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1314 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1315 {
1316   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatEqual_MPISBAIJ"
1328 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1329 {
1330   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1331   Mat            a,b,c,d;
1332   PetscBool      flg;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   a = matA->A; b = matA->B;
1337   c = matB->A; d = matB->B;
1338 
1339   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1340   if (flg) {
1341     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1342   }
1343   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatCopy_MPISBAIJ"
1349 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1350 {
1351   PetscErrorCode ierr;
1352   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1353   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1354 
1355   PetscFunctionBegin;
1356   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1357   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1358     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1359     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1360     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1361   } else {
1362     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1363     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1370 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1371 {
1372   PetscErrorCode ierr;
1373 
1374   PetscFunctionBegin;
1375   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1381 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1382 {
1383   PetscErrorCode ierr;
1384   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1385   PetscBLASInt   bnz,one=1;
1386   Mat_SeqSBAIJ   *xa,*ya;
1387   Mat_SeqBAIJ    *xb,*yb;
1388 
1389   PetscFunctionBegin;
1390   if (str == SAME_NONZERO_PATTERN) {
1391     PetscScalar alpha = a;
1392     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1393     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1394     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1395     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1396     xb   = (Mat_SeqBAIJ*)xx->B->data;
1397     yb   = (Mat_SeqBAIJ*)yy->B->data;
1398     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1399     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1400     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1401   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1402     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
1403     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1404     ierr = MatSetOption(X,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
1405   } else {
1406     Mat      B;
1407     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1408     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1409     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1410     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1411     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1412     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1413     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1414     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1415     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1416     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1417     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1418     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1419     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1420     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1421     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1422     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1423     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1424     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1425     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1426     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1427   }
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1433 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1434 {
1435   PetscErrorCode ierr;
1436   PetscInt       i;
1437   PetscBool      flg;
1438 
1439   PetscFunctionBegin;
1440   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1441   for (i=0; i<n; i++) {
1442     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1443     if (!flg) { /* *B[i] is non-symmetric, set flag */
1444       ierr = MatSetOption(*B[i],MAT_SYMMETRIC,PETSC_FALSE);CHKERRQ(ierr);
1445     }
1446   }
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 /* -------------------------------------------------------------------*/
1451 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1452                                        MatGetRow_MPISBAIJ,
1453                                        MatRestoreRow_MPISBAIJ,
1454                                        MatMult_MPISBAIJ,
1455                                /*  4*/ MatMultAdd_MPISBAIJ,
1456                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1457                                        MatMultAdd_MPISBAIJ,
1458                                        0,
1459                                        0,
1460                                        0,
1461                                /* 10*/ 0,
1462                                        0,
1463                                        0,
1464                                        MatSOR_MPISBAIJ,
1465                                        MatTranspose_MPISBAIJ,
1466                                /* 15*/ MatGetInfo_MPISBAIJ,
1467                                        MatEqual_MPISBAIJ,
1468                                        MatGetDiagonal_MPISBAIJ,
1469                                        MatDiagonalScale_MPISBAIJ,
1470                                        MatNorm_MPISBAIJ,
1471                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1472                                        MatAssemblyEnd_MPISBAIJ,
1473                                        MatSetOption_MPISBAIJ,
1474                                        MatZeroEntries_MPISBAIJ,
1475                                /* 24*/ 0,
1476                                        0,
1477                                        0,
1478                                        0,
1479                                        0,
1480                                /* 29*/ MatSetUp_MPISBAIJ,
1481                                        0,
1482                                        0,
1483                                        0,
1484                                        0,
1485                                /* 34*/ MatDuplicate_MPISBAIJ,
1486                                        0,
1487                                        0,
1488                                        0,
1489                                        0,
1490                                /* 39*/ MatAXPY_MPISBAIJ,
1491                                        MatGetSubMatrices_MPISBAIJ,
1492                                        MatIncreaseOverlap_MPISBAIJ,
1493                                        MatGetValues_MPISBAIJ,
1494                                        MatCopy_MPISBAIJ,
1495                                /* 44*/ 0,
1496                                        MatScale_MPISBAIJ,
1497                                        0,
1498                                        0,
1499                                        0,
1500                                /* 49*/ 0,
1501                                        0,
1502                                        0,
1503                                        0,
1504                                        0,
1505                                /* 54*/ 0,
1506                                        0,
1507                                        MatSetUnfactored_MPISBAIJ,
1508                                        0,
1509                                        MatSetValuesBlocked_MPISBAIJ,
1510                                /* 59*/ 0,
1511                                        0,
1512                                        0,
1513                                        0,
1514                                        0,
1515                                /* 64*/ 0,
1516                                        0,
1517                                        0,
1518                                        0,
1519                                        0,
1520                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1521                                        0,
1522                                        0,
1523                                        0,
1524                                        0,
1525                                /* 74*/ 0,
1526                                        0,
1527                                        0,
1528                                        0,
1529                                        0,
1530                                /* 79*/ 0,
1531                                        0,
1532                                        0,
1533                                        0,
1534                                        MatLoad_MPISBAIJ,
1535                                /* 84*/ 0,
1536                                        0,
1537                                        0,
1538                                        0,
1539                                        0,
1540                                /* 89*/ 0,
1541                                        0,
1542                                        0,
1543                                        0,
1544                                        0,
1545                                /* 94*/ 0,
1546                                        0,
1547                                        0,
1548                                        0,
1549                                        0,
1550                                /* 99*/ 0,
1551                                        0,
1552                                        0,
1553                                        0,
1554                                        0,
1555                                /*104*/ 0,
1556                                        MatRealPart_MPISBAIJ,
1557                                        MatImaginaryPart_MPISBAIJ,
1558                                        MatGetRowUpperTriangular_MPISBAIJ,
1559                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1560                                /*109*/ 0,
1561                                        0,
1562                                        0,
1563                                        0,
1564                                        0,
1565                                /*114*/ 0,
1566                                        0,
1567                                        0,
1568                                        0,
1569                                        0,
1570                                /*119*/ 0,
1571                                        0,
1572                                        0,
1573                                        0,
1574                                        0,
1575                                /*124*/ 0,
1576                                        0,
1577                                        0,
1578                                        0,
1579                                        0,
1580                                /*129*/ 0,
1581                                        0,
1582                                        0,
1583                                        0,
1584                                        0,
1585                                /*134*/ 0,
1586                                        0,
1587                                        0,
1588                                        0,
1589                                        0,
1590                                /*139*/ 0,
1591                                        0,
1592                                        0,
1593                                        0,
1594                                        0,
1595                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPISBAIJ
1596 };
1597 
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1600 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1601 {
1602   PetscFunctionBegin;
1603   *a = ((Mat_MPISBAIJ*)A->data)->A;
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1609 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1610 {
1611   Mat_MPISBAIJ   *b;
1612   PetscErrorCode ierr;
1613   PetscInt       i,mbs,Mbs;
1614 
1615   PetscFunctionBegin;
1616   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1617   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1618   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1619   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1620 
1621   b   = (Mat_MPISBAIJ*)B->data;
1622   mbs = B->rmap->n/bs;
1623   Mbs = B->rmap->N/bs;
1624   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1625 
1626   B->rmap->bs = bs;
1627   b->bs2      = bs*bs;
1628   b->mbs      = mbs;
1629   b->Mbs      = Mbs;
1630   b->nbs      = B->cmap->n/bs;
1631   b->Nbs      = B->cmap->N/bs;
1632 
1633   for (i=0; i<=b->size; i++) {
1634     b->rangebs[i] = B->rmap->range[i]/bs;
1635   }
1636   b->rstartbs = B->rmap->rstart/bs;
1637   b->rendbs   = B->rmap->rend/bs;
1638 
1639   b->cstartbs = B->cmap->rstart/bs;
1640   b->cendbs   = B->cmap->rend/bs;
1641 
1642   if (!B->preallocated) {
1643     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1644     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1645     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1646     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1647     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1648     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1649     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1650     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1651     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1652   }
1653 
1654   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1655   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1656 
1657   B->preallocated = PETSC_TRUE;
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1663 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1664 {
1665   PetscInt       m,rstart,cstart,cend;
1666   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1667   const PetscInt *JJ    =0;
1668   PetscScalar    *values=0;
1669   PetscErrorCode ierr;
1670 
1671   PetscFunctionBegin;
1672   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1673   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1674   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1675   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1676   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1677   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1678   m      = B->rmap->n/bs;
1679   rstart = B->rmap->rstart/bs;
1680   cstart = B->cmap->rstart/bs;
1681   cend   = B->cmap->rend/bs;
1682 
1683   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1684   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1685   for (i=0; i<m; i++) {
1686     nz = ii[i+1] - ii[i];
1687     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1688     nz_max = PetscMax(nz_max,nz);
1689     JJ     = jj + ii[i];
1690     for (j=0; j<nz; j++) {
1691       if (*JJ >= cstart) break;
1692       JJ++;
1693     }
1694     d = 0;
1695     for (; j<nz; j++) {
1696       if (*JJ++ >= cend) break;
1697       d++;
1698     }
1699     d_nnz[i] = d;
1700     o_nnz[i] = nz - d;
1701   }
1702   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1703   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1704 
1705   values = (PetscScalar*)V;
1706   if (!values) {
1707     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1708     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1709   }
1710   for (i=0; i<m; i++) {
1711     PetscInt          row    = i + rstart;
1712     PetscInt          ncols  = ii[i+1] - ii[i];
1713     const PetscInt    *icols = jj + ii[i];
1714     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1715     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1716   }
1717 
1718   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1719   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 /*MC
1726    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1727    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1728    the matrix is stored.
1729 
1730   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1731   can call MatSetOption(Mat, MAT_HERMITIAN);
1732 
1733    Options Database Keys:
1734 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1735 
1736   Level: beginner
1737 
1738 .seealso: MatCreateMPISBAIJ
1739 M*/
1740 
1741 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1742 
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatCreate_MPISBAIJ"
1745 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1746 {
1747   Mat_MPISBAIJ   *b;
1748   PetscErrorCode ierr;
1749   PetscBool      flg = PETSC_FALSE;
1750 
1751   PetscFunctionBegin;
1752   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1753   B->data = (void*)b;
1754   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1755 
1756   B->ops->destroy = MatDestroy_MPISBAIJ;
1757   B->ops->view    = MatView_MPISBAIJ;
1758   B->assembled    = PETSC_FALSE;
1759   B->insertmode   = NOT_SET_VALUES;
1760 
1761   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1762   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1763 
1764   /* build local table of row and column ownerships */
1765   ierr = PetscMalloc1(b->size+2,&b->rangebs);CHKERRQ(ierr);
1766 
1767   /* build cache for off array entries formed */
1768   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1769 
1770   b->donotstash  = PETSC_FALSE;
1771   b->colmap      = NULL;
1772   b->garray      = NULL;
1773   b->roworiented = PETSC_TRUE;
1774 
1775   /* stuff used in block assembly */
1776   b->barray = 0;
1777 
1778   /* stuff used for matrix vector multiply */
1779   b->lvec    = 0;
1780   b->Mvctx   = 0;
1781   b->slvec0  = 0;
1782   b->slvec0b = 0;
1783   b->slvec1  = 0;
1784   b->slvec1a = 0;
1785   b->slvec1b = 0;
1786   b->sMvctx  = 0;
1787 
1788   /* stuff for MatGetRow() */
1789   b->rowindices   = 0;
1790   b->rowvalues    = 0;
1791   b->getrowactive = PETSC_FALSE;
1792 
1793   /* hash table stuff */
1794   b->ht           = 0;
1795   b->hd           = 0;
1796   b->ht_size      = 0;
1797   b->ht_flag      = PETSC_FALSE;
1798   b->ht_fact      = 0;
1799   b->ht_total_ct  = 0;
1800   b->ht_insert_ct = 0;
1801 
1802   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1803   b->ijonly = PETSC_FALSE;
1804 
1805   b->in_loc = 0;
1806   b->v_loc  = 0;
1807   b->n_loc  = 0;
1808 
1809   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1810   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1815 
1816   B->symmetric                  = PETSC_TRUE;
1817   B->structurally_symmetric     = PETSC_TRUE;
1818   B->symmetric_set              = PETSC_TRUE;
1819   B->structurally_symmetric_set = PETSC_TRUE;
1820 
1821   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1822   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1823   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
1824   if (flg) {
1825     PetscReal fact = 1.39;
1826     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1827     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1828     if (fact <= 1.0) fact = 1.39;
1829     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1830     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1831   }
1832   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 /*MC
1837    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1838 
1839    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1840    and MATMPISBAIJ otherwise.
1841 
1842    Options Database Keys:
1843 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1844 
1845   Level: beginner
1846 
1847 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1848 M*/
1849 
1850 #undef __FUNCT__
1851 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1852 /*@C
1853    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1854    the user should preallocate the matrix storage by setting the parameters
1855    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1856    performance can be increased by more than a factor of 50.
1857 
1858    Collective on Mat
1859 
1860    Input Parameters:
1861 +  B - the matrix
1862 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1863           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1864 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1865            submatrix  (same for all local rows)
1866 .  d_nnz - array containing the number of block nonzeros in the various block rows
1867            in the upper triangular and diagonal part of the in diagonal portion of the local
1868            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1869            for the diagonal entry and set a value even if it is zero.
1870 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1871            submatrix (same for all local rows).
1872 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1873            off-diagonal portion of the local submatrix that is right of the diagonal
1874            (possibly different for each block row) or NULL.
1875 
1876 
1877    Options Database Keys:
1878 .   -mat_no_unroll - uses code that does not unroll the loops in the
1879                      block calculations (much slower)
1880 .   -mat_block_size - size of the blocks to use
1881 
1882    Notes:
1883 
1884    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1885    than it must be used on all processors that share the object for that argument.
1886 
1887    If the *_nnz parameter is given then the *_nz parameter is ignored
1888 
1889    Storage Information:
1890    For a square global matrix we define each processor's diagonal portion
1891    to be its local rows and the corresponding columns (a square submatrix);
1892    each processor's off-diagonal portion encompasses the remainder of the
1893    local matrix (a rectangular submatrix).
1894 
1895    The user can specify preallocated storage for the diagonal part of
1896    the local submatrix with either d_nz or d_nnz (not both).  Set
1897    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1898    memory allocation.  Likewise, specify preallocated storage for the
1899    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1900 
1901    You can call MatGetInfo() to get information on how effective the preallocation was;
1902    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1903    You can also run with the option -info and look for messages with the string
1904    malloc in them to see if additional memory allocation was needed.
1905 
1906    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1907    the figure below we depict these three local rows and all columns (0-11).
1908 
1909 .vb
1910            0 1 2 3 4 5 6 7 8 9 10 11
1911           --------------------------
1912    row 3  |. . . d d d o o o o  o  o
1913    row 4  |. . . d d d o o o o  o  o
1914    row 5  |. . . d d d o o o o  o  o
1915           --------------------------
1916 .ve
1917 
1918    Thus, any entries in the d locations are stored in the d (diagonal)
1919    submatrix, and any entries in the o locations are stored in the
1920    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1921    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1922 
1923    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1924    plus the diagonal part of the d matrix,
1925    and o_nz should indicate the number of block nonzeros per row in the o matrix
1926 
1927    In general, for PDE problems in which most nonzeros are near the diagonal,
1928    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1929    or you will get TERRIBLE performance; see the users' manual chapter on
1930    matrices.
1931 
1932    Level: intermediate
1933 
1934 .keywords: matrix, block, aij, compressed row, sparse, parallel
1935 
1936 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1937 @*/
1938 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1939 {
1940   PetscErrorCode ierr;
1941 
1942   PetscFunctionBegin;
1943   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1944   PetscValidType(B,1);
1945   PetscValidLogicalCollectiveInt(B,bs,2);
1946   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatCreateSBAIJ"
1952 /*@C
1953    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1954    (block compressed row).  For good matrix assembly performance
1955    the user should preallocate the matrix storage by setting the parameters
1956    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1957    performance can be increased by more than a factor of 50.
1958 
1959    Collective on MPI_Comm
1960 
1961    Input Parameters:
1962 +  comm - MPI communicator
1963 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
1964           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
1965 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1966            This value should be the same as the local size used in creating the
1967            y vector for the matrix-vector product y = Ax.
1968 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1969            This value should be the same as the local size used in creating the
1970            x vector for the matrix-vector product y = Ax.
1971 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1972 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1973 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1974            submatrix  (same for all local rows)
1975 .  d_nnz - array containing the number of block nonzeros in the various block rows
1976            in the upper triangular portion of the in diagonal portion of the local
1977            (possibly different for each block block row) or NULL.
1978            If you plan to factor the matrix you must leave room for the diagonal entry and
1979            set its value even if it is zero.
1980 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1981            submatrix (same for all local rows).
1982 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1983            off-diagonal portion of the local submatrix (possibly different for
1984            each block row) or NULL.
1985 
1986    Output Parameter:
1987 .  A - the matrix
1988 
1989    Options Database Keys:
1990 .   -mat_no_unroll - uses code that does not unroll the loops in the
1991                      block calculations (much slower)
1992 .   -mat_block_size - size of the blocks to use
1993 .   -mat_mpi - use the parallel matrix data structures even on one processor
1994                (defaults to using SeqBAIJ format on one processor)
1995 
1996    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1997    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1998    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1999 
2000    Notes:
2001    The number of rows and columns must be divisible by blocksize.
2002    This matrix type does not support complex Hermitian operation.
2003 
2004    The user MUST specify either the local or global matrix dimensions
2005    (possibly both).
2006 
2007    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2008    than it must be used on all processors that share the object for that argument.
2009 
2010    If the *_nnz parameter is given then the *_nz parameter is ignored
2011 
2012    Storage Information:
2013    For a square global matrix we define each processor's diagonal portion
2014    to be its local rows and the corresponding columns (a square submatrix);
2015    each processor's off-diagonal portion encompasses the remainder of the
2016    local matrix (a rectangular submatrix).
2017 
2018    The user can specify preallocated storage for the diagonal part of
2019    the local submatrix with either d_nz or d_nnz (not both).  Set
2020    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2021    memory allocation.  Likewise, specify preallocated storage for the
2022    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2023 
2024    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2025    the figure below we depict these three local rows and all columns (0-11).
2026 
2027 .vb
2028            0 1 2 3 4 5 6 7 8 9 10 11
2029           --------------------------
2030    row 3  |. . . d d d o o o o  o  o
2031    row 4  |. . . d d d o o o o  o  o
2032    row 5  |. . . d d d o o o o  o  o
2033           --------------------------
2034 .ve
2035 
2036    Thus, any entries in the d locations are stored in the d (diagonal)
2037    submatrix, and any entries in the o locations are stored in the
2038    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2039    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2040 
2041    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2042    plus the diagonal part of the d matrix,
2043    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2044    In general, for PDE problems in which most nonzeros are near the diagonal,
2045    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2046    or you will get TERRIBLE performance; see the users' manual chapter on
2047    matrices.
2048 
2049    Level: intermediate
2050 
2051 .keywords: matrix, block, aij, compressed row, sparse, parallel
2052 
2053 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2054 @*/
2055 
2056 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2057 {
2058   PetscErrorCode ierr;
2059   PetscMPIInt    size;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2063   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2064   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2065   if (size > 1) {
2066     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2067     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2068   } else {
2069     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2070     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2078 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2079 {
2080   Mat            mat;
2081   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2082   PetscErrorCode ierr;
2083   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2084   PetscScalar    *array;
2085 
2086   PetscFunctionBegin;
2087   *newmat = 0;
2088 
2089   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2090   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2091   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2092   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2093   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2094   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2095 
2096   mat->factortype   = matin->factortype;
2097   mat->preallocated = PETSC_TRUE;
2098   mat->assembled    = PETSC_TRUE;
2099   mat->insertmode   = NOT_SET_VALUES;
2100 
2101   a      = (Mat_MPISBAIJ*)mat->data;
2102   a->bs2 = oldmat->bs2;
2103   a->mbs = oldmat->mbs;
2104   a->nbs = oldmat->nbs;
2105   a->Mbs = oldmat->Mbs;
2106   a->Nbs = oldmat->Nbs;
2107 
2108 
2109   a->size         = oldmat->size;
2110   a->rank         = oldmat->rank;
2111   a->donotstash   = oldmat->donotstash;
2112   a->roworiented  = oldmat->roworiented;
2113   a->rowindices   = 0;
2114   a->rowvalues    = 0;
2115   a->getrowactive = PETSC_FALSE;
2116   a->barray       = 0;
2117   a->rstartbs     = oldmat->rstartbs;
2118   a->rendbs       = oldmat->rendbs;
2119   a->cstartbs     = oldmat->cstartbs;
2120   a->cendbs       = oldmat->cendbs;
2121 
2122   /* hash table stuff */
2123   a->ht           = 0;
2124   a->hd           = 0;
2125   a->ht_size      = 0;
2126   a->ht_flag      = oldmat->ht_flag;
2127   a->ht_fact      = oldmat->ht_fact;
2128   a->ht_total_ct  = 0;
2129   a->ht_insert_ct = 0;
2130 
2131   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2132   if (oldmat->colmap) {
2133 #if defined(PETSC_USE_CTABLE)
2134     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2135 #else
2136     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
2137     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2138     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2139 #endif
2140   } else a->colmap = 0;
2141 
2142   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2143     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2144     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2145     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2146   } else a->garray = 0;
2147 
2148   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2149   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2150   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2151   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2153 
2154   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2156   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2158 
2159   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2160   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2161   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2162   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2163   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2164   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2165   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2166   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2167   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2168   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2169   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2170   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2171   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2172 
2173   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2174   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2175   a->sMvctx = oldmat->sMvctx;
2176   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2177 
2178   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2179   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2180   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2181   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2182   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2183   *newmat = mat;
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 #undef __FUNCT__
2188 #define __FUNCT__ "MatLoad_MPISBAIJ"
2189 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2190 {
2191   PetscErrorCode ierr;
2192   PetscInt       i,nz,j,rstart,rend;
2193   PetscScalar    *vals,*buf;
2194   MPI_Comm       comm;
2195   MPI_Status     status;
2196   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2197   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2198   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2199   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2200   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2201   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2202   int            fd;
2203 
2204   PetscFunctionBegin;
2205   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2206   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2207   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2208   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2209   if (bs < 0) bs = 1;
2210 
2211   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2212   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2213   if (!rank) {
2214     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2215     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2216     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2217     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2218   }
2219 
2220   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2221 
2222   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2223   M    = header[1];
2224   N    = header[2];
2225 
2226   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2227   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2228   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2229 
2230   /* If global sizes are set, check if they are consistent with that given in the file */
2231   if (sizesset) {
2232     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2233   }
2234   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2235   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2236 
2237   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2238 
2239   /*
2240      This code adds extra rows to make sure the number of rows is
2241      divisible by the blocksize
2242   */
2243   Mbs        = M/bs;
2244   extra_rows = bs - M + bs*(Mbs);
2245   if (extra_rows == bs) extra_rows = 0;
2246   else                  Mbs++;
2247   if (extra_rows &&!rank) {
2248     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2249   }
2250 
2251   /* determine ownership of all rows */
2252   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2253     mbs = Mbs/size + ((Mbs % size) > rank);
2254     m   = mbs*bs;
2255   } else { /* User Set */
2256     m   = newmat->rmap->n;
2257     mbs = m/bs;
2258   }
2259   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2260   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2261   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2262   rowners[0] = 0;
2263   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2264   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2265   rstart = rowners[rank];
2266   rend   = rowners[rank+1];
2267 
2268   /* distribute row lengths to all processors */
2269   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2270   if (!rank) {
2271     ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
2272     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2273     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2274     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2275     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2276     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2277     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2278   } else {
2279     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2280   }
2281 
2282   if (!rank) {   /* procs[0] */
2283     /* calculate the number of nonzeros on each processor */
2284     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2285     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2286     for (i=0; i<size; i++) {
2287       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2288         procsnz[i] += rowlengths[j];
2289       }
2290     }
2291     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2292 
2293     /* determine max buffer needed and allocate it */
2294     maxnz = 0;
2295     for (i=0; i<size; i++) {
2296       maxnz = PetscMax(maxnz,procsnz[i]);
2297     }
2298     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2299 
2300     /* read in my part of the matrix column indices  */
2301     nz     = procsnz[0];
2302     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2303     mycols = ibuf;
2304     if (size == 1) nz -= extra_rows;
2305     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2306     if (size == 1) {
2307       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2308     }
2309 
2310     /* read in every ones (except the last) and ship off */
2311     for (i=1; i<size-1; i++) {
2312       nz   = procsnz[i];
2313       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2314       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2315     }
2316     /* read in the stuff for the last proc */
2317     if (size != 1) {
2318       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2319       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2320       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2321       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2322     }
2323     ierr = PetscFree(cols);CHKERRQ(ierr);
2324   } else {  /* procs[i], i>0 */
2325     /* determine buffer space needed for message */
2326     nz = 0;
2327     for (i=0; i<m; i++) nz += locrowlens[i];
2328     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2329     mycols = ibuf;
2330     /* receive message of column indices*/
2331     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2332     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2333     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2334   }
2335 
2336   /* loop over local rows, determining number of off diagonal entries */
2337   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2338   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2339   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2340   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2341   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2342   rowcount = 0;
2343   nzcount  = 0;
2344   for (i=0; i<mbs; i++) {
2345     dcount  = 0;
2346     odcount = 0;
2347     for (j=0; j<bs; j++) {
2348       kmax = locrowlens[rowcount];
2349       for (k=0; k<kmax; k++) {
2350         tmp = mycols[nzcount++]/bs; /* block col. index */
2351         if (!mask[tmp]) {
2352           mask[tmp] = 1;
2353           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2354           else masked1[dcount++] = tmp; /* entry in diag portion */
2355         }
2356       }
2357       rowcount++;
2358     }
2359 
2360     dlens[i]  = dcount;  /* d_nzz[i] */
2361     odlens[i] = odcount; /* o_nzz[i] */
2362 
2363     /* zero out the mask elements we set */
2364     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2365     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2366   }
2367   if (!sizesset) {
2368     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2369   }
2370   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2371   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2372 
2373   if (!rank) {
2374     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2375     /* read in my part of the matrix numerical values  */
2376     nz     = procsnz[0];
2377     vals   = buf;
2378     mycols = ibuf;
2379     if (size == 1) nz -= extra_rows;
2380     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2381     if (size == 1) {
2382       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2383     }
2384 
2385     /* insert into matrix */
2386     jj = rstart*bs;
2387     for (i=0; i<m; i++) {
2388       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2389       mycols += locrowlens[i];
2390       vals   += locrowlens[i];
2391       jj++;
2392     }
2393 
2394     /* read in other processors (except the last one) and ship out */
2395     for (i=1; i<size-1; i++) {
2396       nz   = procsnz[i];
2397       vals = buf;
2398       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2399       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2400     }
2401     /* the last proc */
2402     if (size != 1) {
2403       nz   = procsnz[i] - extra_rows;
2404       vals = buf;
2405       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2406       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2407       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2408     }
2409     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2410 
2411   } else {
2412     /* receive numeric values */
2413     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2414 
2415     /* receive message of values*/
2416     vals   = buf;
2417     mycols = ibuf;
2418     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2419     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2420     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2421 
2422     /* insert into matrix */
2423     jj = rstart*bs;
2424     for (i=0; i<m; i++) {
2425       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2426       mycols += locrowlens[i];
2427       vals   += locrowlens[i];
2428       jj++;
2429     }
2430   }
2431 
2432   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2433   ierr = PetscFree(buf);CHKERRQ(ierr);
2434   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2435   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2436   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2437   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2438   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2439   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2445 /*XXXXX@
2446    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2447 
2448    Input Parameters:
2449 .  mat  - the matrix
2450 .  fact - factor
2451 
2452    Not Collective on Mat, each process can have a different hash factor
2453 
2454    Level: advanced
2455 
2456   Notes:
2457    This can also be set by the command line option: -mat_use_hash_table fact
2458 
2459 .keywords: matrix, hashtable, factor, HT
2460 
2461 .seealso: MatSetOption()
2462 @XXXXX*/
2463 
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2467 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2468 {
2469   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2470   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2471   PetscReal      atmp;
2472   PetscReal      *work,*svalues,*rvalues;
2473   PetscErrorCode ierr;
2474   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2475   PetscMPIInt    rank,size;
2476   PetscInt       *rowners_bs,dest,count,source;
2477   PetscScalar    *va;
2478   MatScalar      *ba;
2479   MPI_Status     stat;
2480 
2481   PetscFunctionBegin;
2482   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2483   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2484   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2485 
2486   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2487   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2488 
2489   bs  = A->rmap->bs;
2490   mbs = a->mbs;
2491   Mbs = a->Mbs;
2492   ba  = b->a;
2493   bi  = b->i;
2494   bj  = b->j;
2495 
2496   /* find ownerships */
2497   rowners_bs = A->rmap->range;
2498 
2499   /* each proc creates an array to be distributed */
2500   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2501   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2502 
2503   /* row_max for B */
2504   if (rank != size-1) {
2505     for (i=0; i<mbs; i++) {
2506       ncols = bi[1] - bi[0]; bi++;
2507       brow  = bs*i;
2508       for (j=0; j<ncols; j++) {
2509         bcol = bs*(*bj);
2510         for (kcol=0; kcol<bs; kcol++) {
2511           col  = bcol + kcol;                /* local col index */
2512           col += rowners_bs[rank+1];      /* global col index */
2513           for (krow=0; krow<bs; krow++) {
2514             atmp = PetscAbsScalar(*ba); ba++;
2515             row  = brow + krow;   /* local row index */
2516             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2517             if (work[col] < atmp) work[col] = atmp;
2518           }
2519         }
2520         bj++;
2521       }
2522     }
2523 
2524     /* send values to its owners */
2525     for (dest=rank+1; dest<size; dest++) {
2526       svalues = work + rowners_bs[dest];
2527       count   = rowners_bs[dest+1]-rowners_bs[dest];
2528       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2529     }
2530   }
2531 
2532   /* receive values */
2533   if (rank) {
2534     rvalues = work;
2535     count   = rowners_bs[rank+1]-rowners_bs[rank];
2536     for (source=0; source<rank; source++) {
2537       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2538       /* process values */
2539       for (i=0; i<count; i++) {
2540         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2541       }
2542     }
2543   }
2544 
2545   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2546   ierr = PetscFree(work);CHKERRQ(ierr);
2547   PetscFunctionReturn(0);
2548 }
2549 
2550 #undef __FUNCT__
2551 #define __FUNCT__ "MatSOR_MPISBAIJ"
2552 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2553 {
2554   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2555   PetscErrorCode    ierr;
2556   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2557   PetscScalar       *x,*ptr,*from;
2558   Vec               bb1;
2559   const PetscScalar *b;
2560 
2561   PetscFunctionBegin;
2562   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2563   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2564 
2565   if (flag == SOR_APPLY_UPPER) {
2566     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2567     PetscFunctionReturn(0);
2568   }
2569 
2570   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2571     if (flag & SOR_ZERO_INITIAL_GUESS) {
2572       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2573       its--;
2574     }
2575 
2576     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2577     while (its--) {
2578 
2579       /* lower triangular part: slvec0b = - B^T*xx */
2580       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2581 
2582       /* copy xx into slvec0a */
2583       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2584       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2585       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2586       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2587 
2588       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2589 
2590       /* copy bb into slvec1a */
2591       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2592       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2593       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2594       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2595 
2596       /* set slvec1b = 0 */
2597       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2598 
2599       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2600       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2601       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2602       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2603 
2604       /* upper triangular part: bb1 = bb1 - B*x */
2605       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2606 
2607       /* local diagonal sweep */
2608       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2609     }
2610     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2611   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2612     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2613   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2614     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2615   } else if (flag & SOR_EISENSTAT) {
2616     Vec               xx1;
2617     PetscBool         hasop;
2618     const PetscScalar *diag;
2619     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2620     PetscInt          i,n;
2621 
2622     if (!mat->xx1) {
2623       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2624       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2625     }
2626     xx1 = mat->xx1;
2627     bb1 = mat->bb1;
2628 
2629     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2630 
2631     if (!mat->diag) {
2632       /* this is wrong for same matrix with new nonzero values */
2633       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2634       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2635     }
2636     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2637 
2638     if (hasop) {
2639       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2640       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2641     } else {
2642       /*
2643           These two lines are replaced by code that may be a bit faster for a good compiler
2644       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2645       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2646       */
2647       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2648       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2649       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2650       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2651       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2652       if (omega == 1.0) {
2653         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2654         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2655       } else {
2656         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2657         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2658       }
2659       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2660       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2661       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2662       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2663     }
2664 
2665     /* multiply off-diagonal portion of matrix */
2666     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2667     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2668     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2669     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2670     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2671     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2672     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2673     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2674     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2675     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2676 
2677     /* local sweep */
2678     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2679     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2680   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 #undef __FUNCT__
2685 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2686 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2687 {
2688   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2689   PetscErrorCode ierr;
2690   Vec            lvec1,bb1;
2691 
2692   PetscFunctionBegin;
2693   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2694   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2695 
2696   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2697     if (flag & SOR_ZERO_INITIAL_GUESS) {
2698       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2699       its--;
2700     }
2701 
2702     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2703     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2704     while (its--) {
2705       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706 
2707       /* lower diagonal part: bb1 = bb - B^T*xx */
2708       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2709       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2710 
2711       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2712       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2713       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2714 
2715       /* upper diagonal part: bb1 = bb1 - B*x */
2716       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2717       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2718 
2719       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2720 
2721       /* diagonal sweep */
2722       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2723     }
2724     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2725     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2726   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2732 /*@
2733      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2734          CSR format the local rows.
2735 
2736    Collective on MPI_Comm
2737 
2738    Input Parameters:
2739 +  comm - MPI communicator
2740 .  bs - the block size, only a block size of 1 is supported
2741 .  m - number of local rows (Cannot be PETSC_DECIDE)
2742 .  n - This value should be the same as the local size used in creating the
2743        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2744        calculated if N is given) For square matrices n is almost always m.
2745 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2746 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2747 .   i - row indices
2748 .   j - column indices
2749 -   a - matrix values
2750 
2751    Output Parameter:
2752 .   mat - the matrix
2753 
2754    Level: intermediate
2755 
2756    Notes:
2757        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2758      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2759      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2760 
2761        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2762 
2763 .keywords: matrix, aij, compressed row, sparse, parallel
2764 
2765 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2766           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2767 @*/
2768 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2769 {
2770   PetscErrorCode ierr;
2771 
2772 
2773   PetscFunctionBegin;
2774   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2775   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2776   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2777   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2778   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2779   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 
2784 #undef __FUNCT__
2785 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2786 /*@C
2787    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2788    (the default parallel PETSc format).
2789 
2790    Collective on MPI_Comm
2791 
2792    Input Parameters:
2793 +  B - the matrix
2794 .  bs - the block size
2795 .  i - the indices into j for the start of each local row (starts with zero)
2796 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2797 -  v - optional values in the matrix
2798 
2799    Level: developer
2800 
2801 .keywords: matrix, aij, compressed row, sparse, parallel
2802 
2803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2804 @*/
2805 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2806 {
2807   PetscErrorCode ierr;
2808 
2809   PetscFunctionBegin;
2810   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPISBAIJ"
2816 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2817 {
2818   PetscErrorCode ierr;
2819   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
2820   PetscInt       *indx;
2821   PetscScalar    *values;
2822 
2823   PetscFunctionBegin;
2824   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2825   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2826     Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)inmat->data;
2827     PetscInt       *dnz,*onz,sum,bs,cbs,mbs,Nbs;
2828     PetscInt       *bindx,rmax=a->rmax,j;
2829 
2830     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2831     mbs = m/bs; Nbs = N/cbs;
2832     if (n == PETSC_DECIDE) {
2833       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
2834     }
2835     /* Check sum(n) = Nbs */
2836     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2837     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
2838 
2839     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2840     rstart -= mbs;
2841 
2842     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
2843     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
2844     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2845     for (i=0; i<mbs; i++) {
2846       ierr = MatGetRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
2847       nnz = nnz/bs;
2848       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
2849       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
2850       ierr = MatRestoreRow_SeqSBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
2851     }
2852     ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2853     ierr = PetscFree(bindx);CHKERRQ(ierr);
2854 
2855     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2856     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2857     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
2858     ierr = MatSetType(*outmat,MATMPISBAIJ);CHKERRQ(ierr);
2859     ierr = MatMPISBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
2860     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2861   }
2862 
2863   /* numeric phase */
2864   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
2865   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
2866 
2867   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2868   for (i=0; i<m; i++) {
2869     ierr = MatGetRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2870     Ii   = i + rstart;
2871     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2872     ierr = MatRestoreRow_SeqSBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2873   }
2874   ierr = MatSetOption(inmat,MAT_GETROW_UPPERTRIANGULAR,PETSC_FALSE);CHKERRQ(ierr);
2875   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2876   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2877   PetscFunctionReturn(0);
2878 }
2879