xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision c5b2ea42cd4070151795b0df3c8c47a133418b2b)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
349       if (!baij->donotstash) {
350         if (roworiented) {
351           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
352         } else {
353           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
354         }
355       }
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatGetValues_MPISBAIJ"
363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
364 {
365   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
366   PetscErrorCode ierr;
367   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
368   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
373     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
374     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
375       row = idxm[i] - bsrstart;
376       for (j=0; j<n; j++) {
377         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
378         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
379         if (idxn[j] >= bscstart && idxn[j] < bscend){
380           col = idxn[j] - bscstart;
381           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
382         } else {
383           if (!baij->colmap) {
384             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
385           }
386 #if defined (PETSC_USE_CTABLE)
387           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
388           data --;
389 #else
390           data = baij->colmap[idxn[j]/bs]-1;
391 #endif
392           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
393           else {
394             col  = data + idxn[j]%bs;
395             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
396           }
397         }
398       }
399     } else {
400       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
401     }
402   }
403  PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatNorm_MPISBAIJ"
408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
409 {
410   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscReal      sum[2],*lnorm2;
413 
414   PetscFunctionBegin;
415   if (baij->size == 1) {
416     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
417   } else {
418     if (type == NORM_FROBENIUS) {
419       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
420       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
421       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
422       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
423       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
424       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
425       *norm = sqrt(sum[0] + 2*sum[1]);
426       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
427     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
428       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
429       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
430       PetscReal    *rsum,*rsum2,vabs;
431       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
432       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
433       MatScalar    *v;
434 
435       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
436       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
437       /* Amat */
438       v = amat->a; jj = amat->j;
439       for (brow=0; brow<mbs; brow++) {
440         grow = bs*(rstart + brow);
441         nz = amat->i[brow+1] - amat->i[brow];
442         for (bcol=0; bcol<nz; bcol++){
443           gcol = bs*(rstart + *jj); jj++;
444           for (col=0; col<bs; col++){
445             for (row=0; row<bs; row++){
446               vabs = PetscAbsScalar(*v); v++;
447               rsum[gcol+col] += vabs;
448               /* non-diagonal block */
449               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
450             }
451           }
452         }
453       }
454       /* Bmat */
455       v = bmat->a; jj = bmat->j;
456       for (brow=0; brow<mbs; brow++) {
457         grow = bs*(rstart + brow);
458         nz = bmat->i[brow+1] - bmat->i[brow];
459         for (bcol=0; bcol<nz; bcol++){
460           gcol = bs*garray[*jj]; jj++;
461           for (col=0; col<bs; col++){
462             for (row=0; row<bs; row++){
463               vabs = PetscAbsScalar(*v); v++;
464               rsum[gcol+col] += vabs;
465               rsum[grow+row] += vabs;
466             }
467           }
468         }
469       }
470       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
471       *norm = 0.0;
472       for (col=0; col<mat->cmap->N; col++) {
473         if (rsum2[col] > *norm) *norm = rsum2[col];
474       }
475       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
476     } else {
477       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
486 {
487   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
488   PetscErrorCode ierr;
489   PetscInt       nstash,reallocs;
490   InsertMode     addv;
491 
492   PetscFunctionBegin;
493   if (baij->donotstash || mat->nooffprocentries) {
494     PetscFunctionReturn(0);
495   }
496 
497   /* make sure all processors are either in INSERTMODE or ADDMODE */
498   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
499   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
500   mat->insertmode = addv; /* in case this processor had no cache */
501 
502   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
503   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
504   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
505   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
506   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
507   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
514 {
515   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
516   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
517   PetscErrorCode ierr;
518   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
519   PetscInt       *row,*col;
520   PetscBool      other_disassembled;
521   PetscMPIInt    n;
522   PetscBool      r1,r2,r3;
523   MatScalar      *val;
524   InsertMode     addv = mat->insertmode;
525 
526   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
527   PetscFunctionBegin;
528 
529   if (!baij->donotstash &&  !mat->nooffprocentries) {
530     while (1) {
531       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
532       if (!flg) break;
533 
534       for (i=0; i<n;) {
535         /* Now identify the consecutive vals belonging to the same row */
536         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
537         if (j < n) ncols = j-i;
538         else       ncols = n-i;
539         /* Now assemble all these values with a single function call */
540         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
541         i = j;
542       }
543     }
544     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
545     /* Now process the block-stash. Since the values are stashed column-oriented,
546        set the roworiented flag to column oriented, and after MatSetValues()
547        restore the original flags */
548     r1 = baij->roworiented;
549     r2 = a->roworiented;
550     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
551     baij->roworiented = PETSC_FALSE;
552     a->roworiented    = PETSC_FALSE;
553     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
554     while (1) {
555       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
556       if (!flg) break;
557 
558       for (i=0; i<n;) {
559         /* Now identify the consecutive vals belonging to the same row */
560         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
561         if (j < n) ncols = j-i;
562         else       ncols = n-i;
563         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
564         i = j;
565       }
566     }
567     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
568     baij->roworiented = r1;
569     a->roworiented    = r2;
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
593   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
594   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
595 
596   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
597   baij->rowvalues = 0;
598 
599   PetscFunctionReturn(0);
600 }
601 
602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
603 #undef __FUNCT__
604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
606 {
607   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
608   PetscErrorCode    ierr;
609   PetscInt          bs = mat->rmap->bs;
610   PetscMPIInt       size = baij->size,rank = baij->rank;
611   PetscBool         iascii,isdraw;
612   PetscViewer       sviewer;
613   PetscViewerFormat format;
614 
615   PetscFunctionBegin;
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
618   if (iascii) {
619     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
620     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
621       MatInfo info;
622       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
623       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
624       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
633       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
639       PetscFunctionReturn(0);
640     }
641   }
642 
643   if (isdraw) {
644     PetscDraw  draw;
645     PetscBool  isnull;
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
648   }
649 
650   if (size == 1) {
651     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
652     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
653   } else {
654     /* assemble the entire matrix onto first processor. */
655     Mat          A;
656     Mat_SeqSBAIJ *Aloc;
657     Mat_SeqBAIJ  *Bloc;
658     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
659     MatScalar    *a;
660 
661     /* Should this be the same type as mat? */
662     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
669     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
670     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
671 
672     /* copy over the A part */
673     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
674     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
675     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
676 
677     for (i=0; i<mbs; i++) {
678       rvals[0] = bs*(baij->rstartbs + i);
679       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
680       for (j=ai[i]; j<ai[i+1]; j++) {
681         col = (baij->cstartbs+aj[j])*bs;
682         for (k=0; k<bs; k++) {
683           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
684           col++; a += bs;
685         }
686       }
687     }
688     /* copy over the B part */
689     Bloc = (Mat_SeqBAIJ*)baij->B->data;
690     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
691     for (i=0; i<mbs; i++) {
692 
693       rvals[0] = bs*(baij->rstartbs + i);
694       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
695       for (j=ai[i]; j<ai[i+1]; j++) {
696         col = baij->garray[aj[j]]*bs;
697         for (k=0; k<bs; k++) {
698           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
699           col++; a += bs;
700         }
701       }
702     }
703     ierr = PetscFree(rvals);CHKERRQ(ierr);
704     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     /*
707        Everyone has to call to draw the matrix since the graphics waits are
708        synchronized across all processors that share the PetscDraw object
709     */
710     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
713           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
714       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
715       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
716     }
717     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     ierr = MatDestroy(A);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPISBAIJ"
725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
726 {
727   PetscErrorCode ierr;
728   PetscBool      iascii,isdraw,issocket,isbinary;
729 
730   PetscFunctionBegin;
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
735   if (iascii || isdraw || issocket || isbinary) {
736     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
737   } else {
738     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatDestroy_MPISBAIJ"
745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751 #if defined(PETSC_USE_LOG)
752   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
753 #endif
754   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
755   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
756   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
757   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
758 #if defined (PETSC_USE_CTABLE)
759   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
760 #else
761   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
762 #endif
763   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
764   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
765   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
766   if (baij->slvec0) {
767     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
768     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
769   }
770   if (baij->slvec1) {
771     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
772     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
773     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
774   }
775   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
776   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
777   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
778   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
779   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
780   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
781   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
782 #if defined(PETSC_USE_REAL_MAT_SINGLE)
783   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
784 #endif
785   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
788   ierr = PetscFree(baij);CHKERRQ(ierr);
789 
790   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
801 {
802   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
805   PetscScalar    *x,*from;
806 
807   PetscFunctionBegin;
808   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
809   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
810 
811   /* diagonal part */
812   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
813   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
814 
815   /* subdiagonal part */
816   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
817 
818   /* copy x into the vec slvec0 */
819   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
823   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825 
826   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828   /* supperdiagonal part */
829   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMult_MPISBAIJ"
835 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
836 {
837   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
838   PetscErrorCode ierr;
839   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
840   PetscScalar    *x,*from;
841 
842   PetscFunctionBegin;
843   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
844   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
879 
880   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
881   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
882 
883   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   /* do diagonal part */
885   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
886   /* do supperdiagonal part */
887   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
889   /* do subdiagonal part */
890   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
891   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
893 
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
899 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
900 {
901   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
902   PetscErrorCode ierr;
903   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
904   PetscScalar    *x,*from,zero=0.0;
905 
906   PetscFunctionBegin;
907   /*
908   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
909   PetscSynchronizedFlush(((PetscObject)A)->comm);
910   */
911   /* diagonal part */
912   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
913   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
914 
915   /* subdiagonal part */
916   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
917 
918   /* copy x into the vec slvec0 */
919   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
920   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
921   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
922   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
923 
924   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
926   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927 
928   /* supperdiagonal part */
929   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
930 
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   /* do diagonal part */
944   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
945   /* do supperdiagonal part */
946   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
948 
949   /* do subdiagonal part */
950   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
951   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
953 
954   PetscFunctionReturn(0);
955 }
956 
957 /*
958   This only works correctly for square matrices where the subblock A->A is the
959    diagonal block
960 */
961 #undef __FUNCT__
962 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
963 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
964 {
965   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
966   PetscErrorCode ierr;
967 
968   PetscFunctionBegin;
969   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
970   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "MatScale_MPISBAIJ"
976 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
977 {
978   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
979   PetscErrorCode ierr;
980 
981   PetscFunctionBegin;
982   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
983   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatGetRow_MPISBAIJ"
989 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
990 {
991   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
992   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
993   PetscErrorCode ierr;
994   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
995   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
996   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
997 
998   PetscFunctionBegin;
999   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1000   mat->getrowactive = PETSC_TRUE;
1001 
1002   if (!mat->rowvalues && (idx || v)) {
1003     /*
1004         allocate enough space to hold information from the longest row.
1005     */
1006     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1007     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1008     PetscInt     max = 1,mbs = mat->mbs,tmp;
1009     for (i=0; i<mbs; i++) {
1010       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1011       if (max < tmp) { max = tmp; }
1012     }
1013     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1014   }
1015 
1016   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1017   lrow = row - brstart;  /* local row index */
1018 
1019   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1020   if (!v)   {pvA = 0; pvB = 0;}
1021   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1022   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1023   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1024   nztot = nzA + nzB;
1025 
1026   cmap  = mat->garray;
1027   if (v  || idx) {
1028     if (nztot) {
1029       /* Sort by increasing column numbers, assuming A and B already sorted */
1030       PetscInt imark = -1;
1031       if (v) {
1032         *v = v_p = mat->rowvalues;
1033         for (i=0; i<nzB; i++) {
1034           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1035           else break;
1036         }
1037         imark = i;
1038         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1039         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1040       }
1041       if (idx) {
1042         *idx = idx_p = mat->rowindices;
1043         if (imark > -1) {
1044           for (i=0; i<imark; i++) {
1045             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1046           }
1047         } else {
1048           for (i=0; i<nzB; i++) {
1049             if (cmap[cworkB[i]/bs] < cstart)
1050               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1051             else break;
1052           }
1053           imark = i;
1054         }
1055         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1056         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1057       }
1058     } else {
1059       if (idx) *idx = 0;
1060       if (v)   *v   = 0;
1061     }
1062   }
1063   *nz = nztot;
1064   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1065   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1071 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1072 {
1073   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1074 
1075   PetscFunctionBegin;
1076   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1077   baij->getrowactive = PETSC_FALSE;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1083 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1084 {
1085   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1086   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1087 
1088   PetscFunctionBegin;
1089   aA->getrow_utriangular = PETSC_TRUE;
1090   PetscFunctionReturn(0);
1091 }
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1094 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1095 {
1096   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1097   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1098 
1099   PetscFunctionBegin;
1100   aA->getrow_utriangular = PETSC_FALSE;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1106 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1107 {
1108   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1113   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1119 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1120 {
1121   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1126   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1132 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1133 {
1134   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1139   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1145 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1146 {
1147   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1148   Mat            A = a->A,B = a->B;
1149   PetscErrorCode ierr;
1150   PetscReal      isend[5],irecv[5];
1151 
1152   PetscFunctionBegin;
1153   info->block_size     = (PetscReal)matin->rmap->bs;
1154   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1156   isend[3] = info->memory;  isend[4] = info->mallocs;
1157   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159   isend[3] += info->memory;  isend[4] += info->mallocs;
1160   if (flag == MAT_LOCAL) {
1161     info->nz_used      = isend[0];
1162     info->nz_allocated = isend[1];
1163     info->nz_unneeded  = isend[2];
1164     info->memory       = isend[3];
1165     info->mallocs      = isend[4];
1166   } else if (flag == MAT_GLOBAL_MAX) {
1167     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1168     info->nz_used      = irecv[0];
1169     info->nz_allocated = irecv[1];
1170     info->nz_unneeded  = irecv[2];
1171     info->memory       = irecv[3];
1172     info->mallocs      = irecv[4];
1173   } else if (flag == MAT_GLOBAL_SUM) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   } else {
1181     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1182   }
1183   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1184   info->fill_ratio_needed = 0;
1185   info->factor_mallocs    = 0;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1191 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1192 {
1193   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1194   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   switch (op) {
1199   case MAT_NEW_NONZERO_LOCATIONS:
1200   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1201   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1202   case MAT_KEEP_NONZERO_PATTERN:
1203   case MAT_NEW_NONZERO_LOCATION_ERR:
1204     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1206     break;
1207   case MAT_ROW_ORIENTED:
1208     a->roworiented = flg;
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_NEW_DIAGONALS:
1213     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1214     break;
1215   case MAT_IGNORE_OFF_PROC_ENTRIES:
1216     a->donotstash = flg;
1217     break;
1218   case MAT_USE_HASH_TABLE:
1219     a->ht_flag = flg;
1220     break;
1221   case MAT_HERMITIAN:
1222     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1223     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1224     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1225     break;
1226   case MAT_SPD:
1227     A->spd_set                         = PETSC_TRUE;
1228     A->spd                             = flg;
1229     if (flg) {
1230       A->symmetric                     = PETSC_TRUE;
1231       A->structurally_symmetric        = PETSC_TRUE;
1232       A->symmetric_set                 = PETSC_TRUE;
1233       A->structurally_symmetric_set    = PETSC_TRUE;
1234     }
1235     break;
1236   case MAT_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_STRUCTURALLY_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1241     break;
1242   case MAT_SYMMETRY_ETERNAL:
1243     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1244     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1245     break;
1246   case MAT_IGNORE_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_ERROR_LOWER_TRIANGULAR:
1250     aA->ignore_ltriangular = flg;
1251     break;
1252   case MAT_GETROW_UPPERTRIANGULAR:
1253     aA->getrow_utriangular = flg;
1254     break;
1255   default:
1256     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1263 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1264 {
1265   PetscErrorCode ierr;
1266   PetscFunctionBegin;
1267   if (MAT_INITIAL_MATRIX || *B != A) {
1268     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1275 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1276 {
1277   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1278   Mat            a=baij->A, b=baij->B;
1279   PetscErrorCode ierr;
1280   PetscInt       nv,m,n;
1281   PetscBool      flg;
1282 
1283   PetscFunctionBegin;
1284   if (ll != rr){
1285     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1286     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1287   }
1288   if (!ll) PetscFunctionReturn(0);
1289 
1290   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1291   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1292 
1293   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1294   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1295 
1296   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297 
1298   /* left diagonalscale the off-diagonal part */
1299   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1300 
1301   /* scale the diagonal part */
1302   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1303 
1304   /* right diagonalscale the off-diagonal part */
1305   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1312 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatEqual_MPISBAIJ"
1326 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1327 {
1328   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1329   Mat            a,b,c,d;
1330   PetscBool      flg;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   a = matA->A; b = matA->B;
1335   c = matB->A; d = matB->B;
1336 
1337   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1338   if (flg) {
1339     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1340   }
1341   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatCopy_MPISBAIJ"
1347 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1348 {
1349   PetscErrorCode ierr;
1350   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1351   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1352 
1353   PetscFunctionBegin;
1354   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1355   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1356     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1357     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1358     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1359   } else {
1360     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1361     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1368 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1379 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1380 {
1381   PetscErrorCode ierr;
1382   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1383   PetscBLASInt   bnz,one=1;
1384   Mat_SeqSBAIJ   *xa,*ya;
1385   Mat_SeqBAIJ    *xb,*yb;
1386 
1387   PetscFunctionBegin;
1388   if (str == SAME_NONZERO_PATTERN) {
1389     PetscScalar alpha = a;
1390     xa = (Mat_SeqSBAIJ *)xx->A->data;
1391     ya = (Mat_SeqSBAIJ *)yy->A->data;
1392     bnz = PetscBLASIntCast(xa->nz);
1393     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1394     xb = (Mat_SeqBAIJ *)xx->B->data;
1395     yb = (Mat_SeqBAIJ *)yy->B->data;
1396     bnz = PetscBLASIntCast(xb->nz);
1397     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1398   } else {
1399     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1400     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1401     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1408 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1409 {
1410   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1411   PetscInt        rbs,cbs;
1412   PetscErrorCode  ierr;
1413 
1414   PetscFunctionBegin;
1415   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1416   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1417   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1418   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1419   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1420   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1426 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1427 {
1428   PetscErrorCode ierr;
1429   PetscInt       i;
1430   PetscBool      flg;
1431 
1432   PetscFunctionBegin;
1433   for (i=0; i<n; i++) {
1434     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1435     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1436   }
1437   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 
1442 /* -------------------------------------------------------------------*/
1443 static struct _MatOps MatOps_Values = {
1444        MatSetValues_MPISBAIJ,
1445        MatGetRow_MPISBAIJ,
1446        MatRestoreRow_MPISBAIJ,
1447        MatMult_MPISBAIJ,
1448 /* 4*/ MatMultAdd_MPISBAIJ,
1449        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1450        MatMultAdd_MPISBAIJ,
1451        0,
1452        0,
1453        0,
1454 /*10*/ 0,
1455        0,
1456        0,
1457        MatSOR_MPISBAIJ,
1458        MatTranspose_MPISBAIJ,
1459 /*15*/ MatGetInfo_MPISBAIJ,
1460        MatEqual_MPISBAIJ,
1461        MatGetDiagonal_MPISBAIJ,
1462        MatDiagonalScale_MPISBAIJ,
1463        MatNorm_MPISBAIJ,
1464 /*20*/ MatAssemblyBegin_MPISBAIJ,
1465        MatAssemblyEnd_MPISBAIJ,
1466        MatSetOption_MPISBAIJ,
1467        MatZeroEntries_MPISBAIJ,
1468 /*24*/ 0,
1469        0,
1470        0,
1471        0,
1472        0,
1473 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1474        0,
1475        0,
1476        0,
1477        0,
1478 /*34*/ MatDuplicate_MPISBAIJ,
1479        0,
1480        0,
1481        0,
1482        0,
1483 /*39*/ MatAXPY_MPISBAIJ,
1484        MatGetSubMatrices_MPISBAIJ,
1485        MatIncreaseOverlap_MPISBAIJ,
1486        MatGetValues_MPISBAIJ,
1487        MatCopy_MPISBAIJ,
1488 /*44*/ 0,
1489        MatScale_MPISBAIJ,
1490        0,
1491        0,
1492        0,
1493 /*49*/ MatSetBlockSize_MPISBAIJ,
1494        0,
1495        0,
1496        0,
1497        0,
1498 /*54*/ 0,
1499        0,
1500        MatSetUnfactored_MPISBAIJ,
1501        0,
1502        MatSetValuesBlocked_MPISBAIJ,
1503 /*59*/ 0,
1504        0,
1505        0,
1506        0,
1507        0,
1508 /*64*/ 0,
1509        0,
1510        0,
1511        0,
1512        0,
1513 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*74*/ 0,
1519        0,
1520        0,
1521        0,
1522        0,
1523 /*79*/ 0,
1524        0,
1525        0,
1526        0,
1527        MatLoad_MPISBAIJ,
1528 /*84*/ 0,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*89*/ 0,
1534        0,
1535        0,
1536        0,
1537        0,
1538 /*94*/ 0,
1539        0,
1540        0,
1541        0,
1542        0,
1543 /*99*/ 0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*104*/0,
1549        MatRealPart_MPISBAIJ,
1550        MatImaginaryPart_MPISBAIJ,
1551        MatGetRowUpperTriangular_MPISBAIJ,
1552        MatRestoreRowUpperTriangular_MPISBAIJ,
1553 /*109*/0,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*114*/0,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*119*/0,
1564        0,
1565        0,
1566        0
1567 };
1568 
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1573 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
1574 {
1575   PetscFunctionBegin;
1576   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1577   *iscopy = PETSC_FALSE;
1578   PetscFunctionReturn(0);
1579 }
1580 EXTERN_C_END
1581 
1582 EXTERN_C_BEGIN
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1585 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1586 {
1587   Mat_MPISBAIJ   *b;
1588   PetscErrorCode ierr;
1589   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1590 
1591   PetscFunctionBegin;
1592   if (bs < 0){
1593     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1594       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1595     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1596     bs   = PetscAbs(bs);
1597   }
1598   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1599   bs = newbs;
1600 
1601   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1602   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1603   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1604   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1605 
1606   B->rmap->bs = B->cmap->bs = bs;
1607   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1608   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1609 
1610   if (d_nnz) {
1611     for (i=0; i<B->rmap->n/bs; i++) {
1612       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1613     }
1614   }
1615   if (o_nnz) {
1616     for (i=0; i<B->rmap->n/bs; i++) {
1617       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1618     }
1619   }
1620 
1621   b   = (Mat_MPISBAIJ*)B->data;
1622   mbs = B->rmap->n/bs;
1623   Mbs = B->rmap->N/bs;
1624   if (mbs*bs != B->rmap->n) {
1625     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1626   }
1627 
1628   B->rmap->bs  = bs;
1629   b->bs2 = bs*bs;
1630   b->mbs = mbs;
1631   b->nbs = mbs;
1632   b->Mbs = Mbs;
1633   b->Nbs = Mbs;
1634 
1635   for (i=0; i<=b->size; i++) {
1636     b->rangebs[i] = B->rmap->range[i]/bs;
1637   }
1638   b->rstartbs = B->rmap->rstart/bs;
1639   b->rendbs   = B->rmap->rend/bs;
1640 
1641   b->cstartbs = B->cmap->rstart/bs;
1642   b->cendbs   = B->cmap->rend/bs;
1643 
1644   if (!B->preallocated) {
1645     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1646     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1647     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1648     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1649     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1650     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1651     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1652     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1653     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1654   }
1655 
1656   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1657   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1658   B->preallocated = PETSC_TRUE;
1659   PetscFunctionReturn(0);
1660 }
1661 EXTERN_C_END
1662 
1663 EXTERN_C_BEGIN
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1666 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1667 {
1668   PetscInt       m,rstart,cstart,cend;
1669   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1670   const PetscInt *JJ=0;
1671   PetscScalar    *values=0;
1672   PetscErrorCode ierr;
1673 
1674   PetscFunctionBegin;
1675 
1676   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1677   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1678   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1679   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1680   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1681   m      = B->rmap->n/bs;
1682   rstart = B->rmap->rstart/bs;
1683   cstart = B->cmap->rstart/bs;
1684   cend   = B->cmap->rend/bs;
1685 
1686   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1687   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1688   for (i=0; i<m; i++) {
1689     nz = ii[i+1] - ii[i];
1690     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1691     nz_max = PetscMax(nz_max,nz);
1692     JJ  = jj + ii[i];
1693     for (j=0; j<nz; j++) {
1694       if (*JJ >= cstart) break;
1695       JJ++;
1696     }
1697     d = 0;
1698     for (; j<nz; j++) {
1699       if (*JJ++ >= cend) break;
1700       d++;
1701     }
1702     d_nnz[i] = d;
1703     o_nnz[i] = nz - d;
1704   }
1705   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1706   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1707 
1708   values = (PetscScalar*)V;
1709   if (!values) {
1710     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1711     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1712   }
1713   for (i=0; i<m; i++) {
1714     PetscInt          row    = i + rstart;
1715     PetscInt          ncols  = ii[i+1] - ii[i];
1716     const PetscInt    *icols = jj + ii[i];
1717     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1718     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1719   }
1720 
1721   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1722   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1724 
1725   PetscFunctionReturn(0);
1726 }
1727 EXTERN_C_END
1728 
1729 EXTERN_C_BEGIN
1730 #if defined(PETSC_HAVE_MUMPS)
1731 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1732 #endif
1733 #if defined(PETSC_HAVE_SPOOLES)
1734 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1735 #endif
1736 #if defined(PETSC_HAVE_PASTIX)
1737 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1738 #endif
1739 EXTERN_C_END
1740 
1741 /*MC
1742    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1743    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1744 
1745    Options Database Keys:
1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1747 
1748   Level: beginner
1749 
1750 .seealso: MatCreateMPISBAIJ
1751 M*/
1752 
1753 EXTERN_C_BEGIN
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatCreate_MPISBAIJ"
1756 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1757 {
1758   Mat_MPISBAIJ   *b;
1759   PetscErrorCode ierr;
1760   PetscBool      flg;
1761 
1762   PetscFunctionBegin;
1763 
1764   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1765   B->data = (void*)b;
1766   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1767 
1768   B->ops->destroy    = MatDestroy_MPISBAIJ;
1769   B->ops->view       = MatView_MPISBAIJ;
1770   B->assembled       = PETSC_FALSE;
1771 
1772   B->insertmode = NOT_SET_VALUES;
1773   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1774   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1775 
1776   /* build local table of row and column ownerships */
1777   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1778 
1779   /* build cache for off array entries formed */
1780   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1781   b->donotstash  = PETSC_FALSE;
1782   b->colmap      = PETSC_NULL;
1783   b->garray      = PETSC_NULL;
1784   b->roworiented = PETSC_TRUE;
1785 
1786   /* stuff used in block assembly */
1787   b->barray       = 0;
1788 
1789   /* stuff used for matrix vector multiply */
1790   b->lvec         = 0;
1791   b->Mvctx        = 0;
1792   b->slvec0       = 0;
1793   b->slvec0b      = 0;
1794   b->slvec1       = 0;
1795   b->slvec1a      = 0;
1796   b->slvec1b      = 0;
1797   b->sMvctx       = 0;
1798 
1799   /* stuff for MatGetRow() */
1800   b->rowindices   = 0;
1801   b->rowvalues    = 0;
1802   b->getrowactive = PETSC_FALSE;
1803 
1804   /* hash table stuff */
1805   b->ht           = 0;
1806   b->hd           = 0;
1807   b->ht_size      = 0;
1808   b->ht_flag      = PETSC_FALSE;
1809   b->ht_fact      = 0;
1810   b->ht_total_ct  = 0;
1811   b->ht_insert_ct = 0;
1812 
1813   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1814   b->ijonly       = PETSC_FALSE;
1815 
1816   b->in_loc       = 0;
1817   b->v_loc        = 0;
1818   b->n_loc        = 0;
1819   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1820     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1821     if (flg) {
1822       PetscReal fact = 1.39;
1823       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1824       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1825       if (fact <= 1.0) fact = 1.39;
1826       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1827       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1828     }
1829   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1830 
1831 #if defined(PETSC_HAVE_PASTIX)
1832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1833 					   "MatGetFactor_mpisbaij_pastix",
1834 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1835 #endif
1836 #if defined(PETSC_HAVE_MUMPS)
1837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1838                                      "MatGetFactor_sbaij_mumps",
1839                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1840 #endif
1841 #if defined(PETSC_HAVE_SPOOLES)
1842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1843                                      "MatGetFactor_mpisbaij_spooles",
1844                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1845 #endif
1846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1847                                      "MatStoreValues_MPISBAIJ",
1848                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1850                                      "MatRetrieveValues_MPISBAIJ",
1851                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1853                                      "MatGetDiagonalBlock_MPISBAIJ",
1854                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1856                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1857                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1858   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1859                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1860                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1861   B->symmetric                  = PETSC_TRUE;
1862   B->structurally_symmetric     = PETSC_TRUE;
1863   B->symmetric_set              = PETSC_TRUE;
1864   B->structurally_symmetric_set = PETSC_TRUE;
1865   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1866   PetscFunctionReturn(0);
1867 }
1868 EXTERN_C_END
1869 
1870 /*MC
1871    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1872 
1873    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1874    and MATMPISBAIJ otherwise.
1875 
1876    Options Database Keys:
1877 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1878 
1879   Level: beginner
1880 
1881 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1882 M*/
1883 
1884 EXTERN_C_BEGIN
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatCreate_SBAIJ"
1887 PetscErrorCode  MatCreate_SBAIJ(Mat A)
1888 {
1889   PetscErrorCode ierr;
1890   PetscMPIInt    size;
1891 
1892   PetscFunctionBegin;
1893   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1894   if (size == 1) {
1895     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1896   } else {
1897     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 EXTERN_C_END
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1905 /*@C
1906    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1907    the user should preallocate the matrix storage by setting the parameters
1908    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1909    performance can be increased by more than a factor of 50.
1910 
1911    Collective on Mat
1912 
1913    Input Parameters:
1914 +  A - the matrix
1915 .  bs   - size of blockk
1916 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1917            submatrix  (same for all local rows)
1918 .  d_nnz - array containing the number of block nonzeros in the various block rows
1919            in the upper triangular and diagonal part of the in diagonal portion of the local
1920            (possibly different for each block row) or PETSC_NULL.  You must leave room
1921            for the diagonal entry even if it is zero.
1922 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1923            submatrix (same for all local rows).
1924 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1925            off-diagonal portion of the local submatrix (possibly different for
1926            each block row) or PETSC_NULL.
1927 
1928 
1929    Options Database Keys:
1930 .   -mat_no_unroll - uses code that does not unroll the loops in the
1931                      block calculations (much slower)
1932 .   -mat_block_size - size of the blocks to use
1933 
1934    Notes:
1935 
1936    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1937    than it must be used on all processors that share the object for that argument.
1938 
1939    If the *_nnz parameter is given then the *_nz parameter is ignored
1940 
1941    Storage Information:
1942    For a square global matrix we define each processor's diagonal portion
1943    to be its local rows and the corresponding columns (a square submatrix);
1944    each processor's off-diagonal portion encompasses the remainder of the
1945    local matrix (a rectangular submatrix).
1946 
1947    The user can specify preallocated storage for the diagonal part of
1948    the local submatrix with either d_nz or d_nnz (not both).  Set
1949    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1950    memory allocation.  Likewise, specify preallocated storage for the
1951    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1952 
1953    You can call MatGetInfo() to get information on how effective the preallocation was;
1954    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1955    You can also run with the option -info and look for messages with the string
1956    malloc in them to see if additional memory allocation was needed.
1957 
1958    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1959    the figure below we depict these three local rows and all columns (0-11).
1960 
1961 .vb
1962            0 1 2 3 4 5 6 7 8 9 10 11
1963           -------------------
1964    row 3  |  o o o d d d o o o o o o
1965    row 4  |  o o o d d d o o o o o o
1966    row 5  |  o o o d d d o o o o o o
1967           -------------------
1968 .ve
1969 
1970    Thus, any entries in the d locations are stored in the d (diagonal)
1971    submatrix, and any entries in the o locations are stored in the
1972    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1973    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1974 
1975    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1976    plus the diagonal part of the d matrix,
1977    and o_nz should indicate the number of block nonzeros per row in the upper triangular
1978    part of the o matrix.
1979    In general, for PDE problems in which most nonzeros are near the diagonal,
1980    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1981    or you will get TERRIBLE performance; see the users' manual chapter on
1982    matrices.
1983 
1984    Level: intermediate
1985 
1986 .keywords: matrix, block, aij, compressed row, sparse, parallel
1987 
1988 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1989 @*/
1990 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1991 {
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatCreateMPISBAIJ"
2001 /*@C
2002    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2003    (block compressed row).  For good matrix assembly performance
2004    the user should preallocate the matrix storage by setting the parameters
2005    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2006    performance can be increased by more than a factor of 50.
2007 
2008    Collective on MPI_Comm
2009 
2010    Input Parameters:
2011 +  comm - MPI communicator
2012 .  bs   - size of blockk
2013 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2014            This value should be the same as the local size used in creating the
2015            y vector for the matrix-vector product y = Ax.
2016 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2017            This value should be the same as the local size used in creating the
2018            x vector for the matrix-vector product y = Ax.
2019 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2020 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2021 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2022            submatrix  (same for all local rows)
2023 .  d_nnz - array containing the number of block nonzeros in the various block rows
2024            in the upper triangular portion of the in diagonal portion of the local
2025            (possibly different for each block block row) or PETSC_NULL.
2026            You must leave room for the diagonal entry even if it is zero.
2027 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2028            submatrix (same for all local rows).
2029 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2030            off-diagonal portion of the local submatrix (possibly different for
2031            each block row) or PETSC_NULL.
2032 
2033    Output Parameter:
2034 .  A - the matrix
2035 
2036    Options Database Keys:
2037 .   -mat_no_unroll - uses code that does not unroll the loops in the
2038                      block calculations (much slower)
2039 .   -mat_block_size - size of the blocks to use
2040 .   -mat_mpi - use the parallel matrix data structures even on one processor
2041                (defaults to using SeqBAIJ format on one processor)
2042 
2043    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2044    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2045    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2046 
2047    Notes:
2048    The number of rows and columns must be divisible by blocksize.
2049    This matrix type does not support complex Hermitian operation.
2050 
2051    The user MUST specify either the local or global matrix dimensions
2052    (possibly both).
2053 
2054    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2055    than it must be used on all processors that share the object for that argument.
2056 
2057    If the *_nnz parameter is given then the *_nz parameter is ignored
2058 
2059    Storage Information:
2060    For a square global matrix we define each processor's diagonal portion
2061    to be its local rows and the corresponding columns (a square submatrix);
2062    each processor's off-diagonal portion encompasses the remainder of the
2063    local matrix (a rectangular submatrix).
2064 
2065    The user can specify preallocated storage for the diagonal part of
2066    the local submatrix with either d_nz or d_nnz (not both).  Set
2067    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2068    memory allocation.  Likewise, specify preallocated storage for the
2069    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2070 
2071    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2072    the figure below we depict these three local rows and all columns (0-11).
2073 
2074 .vb
2075            0 1 2 3 4 5 6 7 8 9 10 11
2076           -------------------
2077    row 3  |  o o o d d d o o o o o o
2078    row 4  |  o o o d d d o o o o o o
2079    row 5  |  o o o d d d o o o o o o
2080           -------------------
2081 .ve
2082 
2083    Thus, any entries in the d locations are stored in the d (diagonal)
2084    submatrix, and any entries in the o locations are stored in the
2085    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2086    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2087 
2088    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2089    plus the diagonal part of the d matrix,
2090    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2091    In general, for PDE problems in which most nonzeros are near the diagonal,
2092    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2093    or you will get TERRIBLE performance; see the users' manual chapter on
2094    matrices.
2095 
2096    Level: intermediate
2097 
2098 .keywords: matrix, block, aij, compressed row, sparse, parallel
2099 
2100 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2101 @*/
2102 
2103 PetscErrorCode  MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2104 {
2105   PetscErrorCode ierr;
2106   PetscMPIInt    size;
2107 
2108   PetscFunctionBegin;
2109   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2110   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2111   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2112   if (size > 1) {
2113     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2114     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2115   } else {
2116     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2117     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 
2123 #undef __FUNCT__
2124 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2125 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2126 {
2127   Mat            mat;
2128   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2129   PetscErrorCode ierr;
2130   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2131   PetscScalar    *array;
2132 
2133   PetscFunctionBegin;
2134   *newmat       = 0;
2135   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2136   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2137   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2138   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2139   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2140   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2141 
2142   mat->factortype   = matin->factortype;
2143   mat->preallocated = PETSC_TRUE;
2144   mat->assembled    = PETSC_TRUE;
2145   mat->insertmode   = NOT_SET_VALUES;
2146 
2147   a = (Mat_MPISBAIJ*)mat->data;
2148   a->bs2   = oldmat->bs2;
2149   a->mbs   = oldmat->mbs;
2150   a->nbs   = oldmat->nbs;
2151   a->Mbs   = oldmat->Mbs;
2152   a->Nbs   = oldmat->Nbs;
2153 
2154 
2155   a->size         = oldmat->size;
2156   a->rank         = oldmat->rank;
2157   a->donotstash   = oldmat->donotstash;
2158   a->roworiented  = oldmat->roworiented;
2159   a->rowindices   = 0;
2160   a->rowvalues    = 0;
2161   a->getrowactive = PETSC_FALSE;
2162   a->barray       = 0;
2163   a->rstartbs    = oldmat->rstartbs;
2164   a->rendbs      = oldmat->rendbs;
2165   a->cstartbs    = oldmat->cstartbs;
2166   a->cendbs      = oldmat->cendbs;
2167 
2168   /* hash table stuff */
2169   a->ht           = 0;
2170   a->hd           = 0;
2171   a->ht_size      = 0;
2172   a->ht_flag      = oldmat->ht_flag;
2173   a->ht_fact      = oldmat->ht_fact;
2174   a->ht_total_ct  = 0;
2175   a->ht_insert_ct = 0;
2176 
2177   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2178   if (oldmat->colmap) {
2179 #if defined (PETSC_USE_CTABLE)
2180     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2181 #else
2182     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2183     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2184     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2185 #endif
2186   } else a->colmap = 0;
2187 
2188   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2189     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2190     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2191     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2192   } else a->garray = 0;
2193 
2194   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2195   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2196   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2197   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2198   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2199 
2200   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2201   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2202   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2203   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2204 
2205   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2206   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2207   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2208   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2209   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2210   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2211   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2212   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2213   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2214   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2215   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2216   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2217   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2218 
2219   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2220   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2221   a->sMvctx = oldmat->sMvctx;
2222   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2223 
2224   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2225   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2226   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2227   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2228   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2229   *newmat = mat;
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatLoad_MPISBAIJ"
2235 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2236 {
2237   PetscErrorCode ierr;
2238   PetscInt       i,nz,j,rstart,rend;
2239   PetscScalar    *vals,*buf;
2240   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2241   MPI_Status     status;
2242   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2243   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2244   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2245   PetscInt       bs=1,Mbs,mbs,extra_rows;
2246   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2247   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2248   int            fd;
2249 
2250   PetscFunctionBegin;
2251   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2252     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2253   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2254 
2255   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2256   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2257   if (!rank) {
2258     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2259     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2260     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2261     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2262   }
2263 
2264   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2265 
2266   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2267   M = header[1]; N = header[2];
2268 
2269   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2270   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2271   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2272 
2273   /* If global sizes are set, check if they are consistent with that given in the file */
2274   if (sizesset) {
2275     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2276   }
2277   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2278   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2279 
2280   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2281 
2282   /*
2283      This code adds extra rows to make sure the number of rows is
2284      divisible by the blocksize
2285   */
2286   Mbs        = M/bs;
2287   extra_rows = bs - M + bs*(Mbs);
2288   if (extra_rows == bs) extra_rows = 0;
2289   else                  Mbs++;
2290   if (extra_rows &&!rank) {
2291     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2292   }
2293 
2294   /* determine ownership of all rows */
2295   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2296     mbs        = Mbs/size + ((Mbs % size) > rank);
2297     m          = mbs*bs;
2298   } else { /* User Set */
2299     m          = newmat->rmap->n;
2300     mbs        = m/bs;
2301   }
2302   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2303   mmbs       = PetscMPIIntCast(mbs);
2304   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2305   rowners[0] = 0;
2306   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2307   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2308   rstart = rowners[rank];
2309   rend   = rowners[rank+1];
2310 
2311   /* distribute row lengths to all processors */
2312   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2313   if (!rank) {
2314     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2315     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2316     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2317     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2318     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2319     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2320     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2321   } else {
2322     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2323   }
2324 
2325   if (!rank) {   /* procs[0] */
2326     /* calculate the number of nonzeros on each processor */
2327     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2328     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2329     for (i=0; i<size; i++) {
2330       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2331         procsnz[i] += rowlengths[j];
2332       }
2333     }
2334     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2335 
2336     /* determine max buffer needed and allocate it */
2337     maxnz = 0;
2338     for (i=0; i<size; i++) {
2339       maxnz = PetscMax(maxnz,procsnz[i]);
2340     }
2341     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2342 
2343     /* read in my part of the matrix column indices  */
2344     nz     = procsnz[0];
2345     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2346     mycols = ibuf;
2347     if (size == 1)  nz -= extra_rows;
2348     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2349     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2350 
2351     /* read in every ones (except the last) and ship off */
2352     for (i=1; i<size-1; i++) {
2353       nz   = procsnz[i];
2354       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2355       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2356     }
2357     /* read in the stuff for the last proc */
2358     if (size != 1) {
2359       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2360       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2361       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2362       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2363     }
2364     ierr = PetscFree(cols);CHKERRQ(ierr);
2365   } else {  /* procs[i], i>0 */
2366     /* determine buffer space needed for message */
2367     nz = 0;
2368     for (i=0; i<m; i++) {
2369       nz += locrowlens[i];
2370     }
2371     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2372     mycols = ibuf;
2373     /* receive message of column indices*/
2374     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2375     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2376     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2377   }
2378 
2379   /* loop over local rows, determining number of off diagonal entries */
2380   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2381   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2382   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2383   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2384   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2385   rowcount = 0;
2386   nzcount  = 0;
2387   for (i=0; i<mbs; i++) {
2388     dcount  = 0;
2389     odcount = 0;
2390     for (j=0; j<bs; j++) {
2391       kmax = locrowlens[rowcount];
2392       for (k=0; k<kmax; k++) {
2393         tmp = mycols[nzcount++]/bs; /* block col. index */
2394         if (!mask[tmp]) {
2395           mask[tmp] = 1;
2396           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2397           else masked1[dcount++] = tmp; /* entry in diag portion */
2398         }
2399       }
2400       rowcount++;
2401     }
2402 
2403     dlens[i]  = dcount;  /* d_nzz[i] */
2404     odlens[i] = odcount; /* o_nzz[i] */
2405 
2406     /* zero out the mask elements we set */
2407     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2408     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2409   }
2410     if (!sizesset) {
2411     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2412   }
2413   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2414   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2415 
2416   if (!rank) {
2417     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2418     /* read in my part of the matrix numerical values  */
2419     nz = procsnz[0];
2420     vals = buf;
2421     mycols = ibuf;
2422     if (size == 1)  nz -= extra_rows;
2423     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2424     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2425 
2426     /* insert into matrix */
2427     jj      = rstart*bs;
2428     for (i=0; i<m; i++) {
2429       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2430       mycols += locrowlens[i];
2431       vals   += locrowlens[i];
2432       jj++;
2433     }
2434 
2435     /* read in other processors (except the last one) and ship out */
2436     for (i=1; i<size-1; i++) {
2437       nz   = procsnz[i];
2438       vals = buf;
2439       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2440       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2441     }
2442     /* the last proc */
2443     if (size != 1){
2444       nz   = procsnz[i] - extra_rows;
2445       vals = buf;
2446       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2447       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2448       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2449     }
2450     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2451 
2452   } else {
2453     /* receive numeric values */
2454     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2455 
2456     /* receive message of values*/
2457     vals   = buf;
2458     mycols = ibuf;
2459     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2460     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2461     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2462 
2463     /* insert into matrix */
2464     jj      = rstart*bs;
2465     for (i=0; i<m; i++) {
2466       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2467       mycols += locrowlens[i];
2468       vals   += locrowlens[i];
2469       jj++;
2470     }
2471   }
2472 
2473   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2474   ierr = PetscFree(buf);CHKERRQ(ierr);
2475   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2476   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2477   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2478   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2479   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2480   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2486 /*XXXXX@
2487    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2488 
2489    Input Parameters:
2490 .  mat  - the matrix
2491 .  fact - factor
2492 
2493    Not Collective on Mat, each process can have a different hash factor
2494 
2495    Level: advanced
2496 
2497   Notes:
2498    This can also be set by the command line option: -mat_use_hash_table fact
2499 
2500 .keywords: matrix, hashtable, factor, HT
2501 
2502 .seealso: MatSetOption()
2503 @XXXXX*/
2504 
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2508 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2509 {
2510   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2511   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2512   PetscReal      atmp;
2513   PetscReal      *work,*svalues,*rvalues;
2514   PetscErrorCode ierr;
2515   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2516   PetscMPIInt    rank,size;
2517   PetscInt       *rowners_bs,dest,count,source;
2518   PetscScalar    *va;
2519   MatScalar      *ba;
2520   MPI_Status     stat;
2521 
2522   PetscFunctionBegin;
2523   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2524   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2525   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2526 
2527   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2528   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2529 
2530   bs   = A->rmap->bs;
2531   mbs  = a->mbs;
2532   Mbs  = a->Mbs;
2533   ba   = b->a;
2534   bi   = b->i;
2535   bj   = b->j;
2536 
2537   /* find ownerships */
2538   rowners_bs = A->rmap->range;
2539 
2540   /* each proc creates an array to be distributed */
2541   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2542   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2543 
2544   /* row_max for B */
2545   if (rank != size-1){
2546     for (i=0; i<mbs; i++) {
2547       ncols = bi[1] - bi[0]; bi++;
2548       brow  = bs*i;
2549       for (j=0; j<ncols; j++){
2550         bcol = bs*(*bj);
2551         for (kcol=0; kcol<bs; kcol++){
2552           col = bcol + kcol;                 /* local col index */
2553           col += rowners_bs[rank+1];      /* global col index */
2554           for (krow=0; krow<bs; krow++){
2555             atmp = PetscAbsScalar(*ba); ba++;
2556             row = brow + krow;    /* local row index */
2557             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2558             if (work[col] < atmp) work[col] = atmp;
2559           }
2560         }
2561         bj++;
2562       }
2563     }
2564 
2565     /* send values to its owners */
2566     for (dest=rank+1; dest<size; dest++){
2567       svalues = work + rowners_bs[dest];
2568       count   = rowners_bs[dest+1]-rowners_bs[dest];
2569       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2570     }
2571   }
2572 
2573   /* receive values */
2574   if (rank){
2575     rvalues = work;
2576     count   = rowners_bs[rank+1]-rowners_bs[rank];
2577     for (source=0; source<rank; source++){
2578       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2579       /* process values */
2580       for (i=0; i<count; i++){
2581         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2582       }
2583     }
2584   }
2585 
2586   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2587   ierr = PetscFree(work);CHKERRQ(ierr);
2588   PetscFunctionReturn(0);
2589 }
2590 
2591 #undef __FUNCT__
2592 #define __FUNCT__ "MatSOR_MPISBAIJ"
2593 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2594 {
2595   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2596   PetscErrorCode    ierr;
2597   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2598   PetscScalar       *x,*ptr,*from;
2599   Vec               bb1;
2600   const PetscScalar *b;
2601 
2602   PetscFunctionBegin;
2603   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2604   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2605 
2606   if (flag == SOR_APPLY_UPPER) {
2607     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2608     PetscFunctionReturn(0);
2609   }
2610 
2611   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2612     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2613       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2614       its--;
2615     }
2616 
2617     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2618     while (its--){
2619 
2620       /* lower triangular part: slvec0b = - B^T*xx */
2621       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2622 
2623       /* copy xx into slvec0a */
2624       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2625       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2626       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2627       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2628 
2629       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2630 
2631       /* copy bb into slvec1a */
2632       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2633       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2634       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2635       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2636 
2637       /* set slvec1b = 0 */
2638       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2639 
2640       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2641       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2642       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2643       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2644 
2645       /* upper triangular part: bb1 = bb1 - B*x */
2646       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2647 
2648       /* local diagonal sweep */
2649       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2650     }
2651     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2652   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2653     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2654   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2655     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2656   } else if (flag & SOR_EISENSTAT) {
2657     Vec               xx1;
2658     PetscBool         hasop;
2659     const PetscScalar *diag;
2660     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2661     PetscInt          i,n;
2662 
2663     if (!mat->xx1) {
2664       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2665       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2666     }
2667     xx1 = mat->xx1;
2668     bb1 = mat->bb1;
2669 
2670     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2671 
2672     if (!mat->diag) {
2673       /* this is wrong for same matrix with new nonzero values */
2674       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2675       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2676     }
2677     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2678 
2679     if (hasop) {
2680       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2681       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2682     } else {
2683       /*
2684           These two lines are replaced by code that may be a bit faster for a good compiler
2685       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2686       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2687       */
2688       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2689       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2690       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2691       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2692       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2693       if (omega == 1.0) {
2694 	for (i=0; i<n; i++) {
2695 	  sl[i] = b[i] - diag[i]*x[i];
2696 	}
2697         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2698       } else {
2699 	for (i=0; i<n; i++) {
2700 	  sl[i] = b[i] + scale*diag[i]*x[i];
2701 	}
2702         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2703       }
2704       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2705       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2706       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2707       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2708     }
2709 
2710     /* multiply off-diagonal portion of matrix */
2711     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2712     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2713     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2714     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2715     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2716     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2717     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2718     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2721 
2722     /* local sweep */
2723     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2724     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2725   } else {
2726     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2727   }
2728   PetscFunctionReturn(0);
2729 }
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2733 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2734 {
2735   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2736   PetscErrorCode ierr;
2737   Vec            lvec1,bb1;
2738 
2739   PetscFunctionBegin;
2740   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2741   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2742 
2743   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2744     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2745       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2746       its--;
2747     }
2748 
2749     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2750     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2751     while (its--){
2752       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2753 
2754       /* lower diagonal part: bb1 = bb - B^T*xx */
2755       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2756       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2757 
2758       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2759       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2760       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2761 
2762       /* upper diagonal part: bb1 = bb1 - B*x */
2763       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2764       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2765 
2766       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2767 
2768       /* diagonal sweep */
2769       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2770     }
2771     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2772     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2773   } else {
2774     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2775   }
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 #undef __FUNCT__
2780 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2781 /*@
2782      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2783          CSR format the local rows.
2784 
2785    Collective on MPI_Comm
2786 
2787    Input Parameters:
2788 +  comm - MPI communicator
2789 .  bs - the block size, only a block size of 1 is supported
2790 .  m - number of local rows (Cannot be PETSC_DECIDE)
2791 .  n - This value should be the same as the local size used in creating the
2792        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2793        calculated if N is given) For square matrices n is almost always m.
2794 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2795 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2796 .   i - row indices
2797 .   j - column indices
2798 -   a - matrix values
2799 
2800    Output Parameter:
2801 .   mat - the matrix
2802 
2803    Level: intermediate
2804 
2805    Notes:
2806        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2807      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2808      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2809 
2810        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2811 
2812 .keywords: matrix, aij, compressed row, sparse, parallel
2813 
2814 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2815           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2816 @*/
2817 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2818 {
2819   PetscErrorCode ierr;
2820 
2821 
2822  PetscFunctionBegin;
2823   if (i[0]) {
2824     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2825   }
2826   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2827   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2828   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2829   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2830   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2831   PetscFunctionReturn(0);
2832 }
2833 
2834 
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2837 /*@C
2838    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2839    (the default parallel PETSc format).
2840 
2841    Collective on MPI_Comm
2842 
2843    Input Parameters:
2844 +  A - the matrix
2845 .  bs - the block size
2846 .  i - the indices into j for the start of each local row (starts with zero)
2847 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2848 -  v - optional values in the matrix
2849 
2850    Level: developer
2851 
2852 .keywords: matrix, aij, compressed row, sparse, parallel
2853 
2854 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2855 @*/
2856 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2857 {
2858   PetscErrorCode ierr;
2859 
2860   PetscFunctionBegin;
2861   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2862   PetscFunctionReturn(0);
2863 }
2864 
2865 
2866