xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b9c8ac66c7d4d8a0c8dc29d9c030d16ade02261a)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap->rstart;
146   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
147   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169 
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++  = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend){
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         }
309         else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined (PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined (PETSC_USE_CTABLE)
330 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col =  in[j];
338             }
339           }
340           else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend){
376           col = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined (PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data --;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else {
396       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
397     }
398   }
399  PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "MatNorm_MPISBAIJ"
404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
405 {
406   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
407   PetscErrorCode ierr;
408   PetscReal      sum[2],*lnorm2;
409 
410   PetscFunctionBegin;
411   if (baij->size == 1) {
412     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
413   } else {
414     if (type == NORM_FROBENIUS) {
415       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
416       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
418       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
420       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
421       *norm = sqrt(sum[0] + 2*sum[1]);
422       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
423     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
424       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
425       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
426       PetscReal    *rsum,*rsum2,vabs;
427       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
428       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
429       MatScalar    *v;
430 
431       ierr  = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
432       rsum2 = rsum + mat->cmap->N;
433       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++){
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++){
442             for (row=0; row<bs; row++){
443               vabs = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++){
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++){
459             for (row=0; row<bs; row++){
460               vabs = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree(rsum);CHKERRQ(ierr);
473     } else {
474       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
475     }
476   }
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
482 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
483 {
484   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
485   PetscErrorCode ierr;
486   PetscInt       nstash,reallocs;
487   InsertMode     addv;
488 
489   PetscFunctionBegin;
490   if (baij->donotstash) {
491     PetscFunctionReturn(0);
492   }
493 
494   /* make sure all processors are either in INSERTMODE or ADDMODE */
495   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
496   if (addv == (ADD_VALUES|INSERT_VALUES)) {
497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
498   }
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ"
797 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) {
807     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
808   }
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
834 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt;
839 
840   PetscFunctionBegin;
841   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
842   if (nt != A->cmap->n) {
843     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844   }
845   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
846   if (nt != A->rmap->N) {
847     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
848   }
849 
850   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
851   /* do diagonal part */
852   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
853   /* do supperdiagonal part */
854   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
856   /* do subdiagonal part */
857   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
858   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
860 
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
866 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
867 {
868   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
869   PetscErrorCode ierr;
870   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
871   PetscScalar    *x,*from,zero=0.0;
872 
873   PetscFunctionBegin;
874   /*
875   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
876   PetscSynchronizedFlush(((PetscObject)A)->comm);
877   */
878   /* diagonal part */
879   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
880   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
881 
882   /* subdiagonal part */
883   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
884 
885   /* copy x into the vec slvec0 */
886   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
887   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
888   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
889   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
890 
891   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
893   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894 
895   /* supperdiagonal part */
896   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
897 
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
903 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
904 {
905   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
906   PetscErrorCode ierr;
907 
908   PetscFunctionBegin;
909   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   /* do diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
912   /* do supperdiagonal part */
913   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
915 
916   /* do subdiagonal part */
917   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
918   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
919   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
920 
921   PetscFunctionReturn(0);
922 }
923 
924 /*
925   This only works correctly for square matrices where the subblock A->A is the
926    diagonal block
927 */
928 #undef __FUNCT__
929 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
930 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
931 {
932   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
933   PetscErrorCode ierr;
934 
935   PetscFunctionBegin;
936   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
937   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatScale_MPISBAIJ"
943 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
944 {
945   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
950   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatGetRow_MPISBAIJ"
956 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
957 {
958   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
959   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
960   PetscErrorCode ierr;
961   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
962   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
963   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
964 
965   PetscFunctionBegin;
966   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
967   mat->getrowactive = PETSC_TRUE;
968 
969   if (!mat->rowvalues && (idx || v)) {
970     /*
971         allocate enough space to hold information from the longest row.
972     */
973     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
974     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
975     PetscInt     max = 1,mbs = mat->mbs,tmp;
976     for (i=0; i<mbs; i++) {
977       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
978       if (max < tmp) { max = tmp; }
979     }
980     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
981     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
982   }
983 
984   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
985   lrow = row - brstart;  /* local row index */
986 
987   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
988   if (!v)   {pvA = 0; pvB = 0;}
989   if (!idx) {pcA = 0; if (!v) pcB = 0;}
990   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
991   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
992   nztot = nzA + nzB;
993 
994   cmap  = mat->garray;
995   if (v  || idx) {
996     if (nztot) {
997       /* Sort by increasing column numbers, assuming A and B already sorted */
998       PetscInt imark = -1;
999       if (v) {
1000         *v = v_p = mat->rowvalues;
1001         for (i=0; i<nzB; i++) {
1002           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1003           else break;
1004         }
1005         imark = i;
1006         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1007         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1008       }
1009       if (idx) {
1010         *idx = idx_p = mat->rowindices;
1011         if (imark > -1) {
1012           for (i=0; i<imark; i++) {
1013             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1014           }
1015         } else {
1016           for (i=0; i<nzB; i++) {
1017             if (cmap[cworkB[i]/bs] < cstart)
1018               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1019             else break;
1020           }
1021           imark = i;
1022         }
1023         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1024         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1025       }
1026     } else {
1027       if (idx) *idx = 0;
1028       if (v)   *v   = 0;
1029     }
1030   }
1031   *nz = nztot;
1032   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1033   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1039 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1040 {
1041   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1042 
1043   PetscFunctionBegin;
1044   if (!baij->getrowactive) {
1045     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1046   }
1047   baij->getrowactive = PETSC_FALSE;
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1053 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1054 {
1055   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1056   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1057 
1058   PetscFunctionBegin;
1059   aA->getrow_utriangular = PETSC_TRUE;
1060   PetscFunctionReturn(0);
1061 }
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1064 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1065 {
1066   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1067   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1068 
1069   PetscFunctionBegin;
1070   aA->getrow_utriangular = PETSC_FALSE;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1076 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1077 {
1078   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1083   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1089 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1090 {
1091   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1096   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1102 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1103 {
1104   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1109   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1115 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1116 {
1117   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1118   Mat            A = a->A,B = a->B;
1119   PetscErrorCode ierr;
1120   PetscReal      isend[5],irecv[5];
1121 
1122   PetscFunctionBegin;
1123   info->block_size     = (PetscReal)matin->rmap->bs;
1124   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1125   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1126   isend[3] = info->memory;  isend[4] = info->mallocs;
1127   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1128   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1129   isend[3] += info->memory;  isend[4] += info->mallocs;
1130   if (flag == MAT_LOCAL) {
1131     info->nz_used      = isend[0];
1132     info->nz_allocated = isend[1];
1133     info->nz_unneeded  = isend[2];
1134     info->memory       = isend[3];
1135     info->mallocs      = isend[4];
1136   } else if (flag == MAT_GLOBAL_MAX) {
1137     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1138     info->nz_used      = irecv[0];
1139     info->nz_allocated = irecv[1];
1140     info->nz_unneeded  = irecv[2];
1141     info->memory       = irecv[3];
1142     info->mallocs      = irecv[4];
1143   } else if (flag == MAT_GLOBAL_SUM) {
1144     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1145     info->nz_used      = irecv[0];
1146     info->nz_allocated = irecv[1];
1147     info->nz_unneeded  = irecv[2];
1148     info->memory       = irecv[3];
1149     info->mallocs      = irecv[4];
1150   } else {
1151     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1152   }
1153   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1154   info->fill_ratio_needed = 0;
1155   info->factor_mallocs    = 0;
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1161 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1162 {
1163   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1164   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   switch (op) {
1169   case MAT_NEW_NONZERO_LOCATIONS:
1170   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1171   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1172   case MAT_KEEP_NONZERO_PATTERN:
1173   case MAT_NEW_NONZERO_LOCATION_ERR:
1174     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1175     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1176     break;
1177   case MAT_ROW_ORIENTED:
1178     a->roworiented = flg;
1179     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1180     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1181     break;
1182   case MAT_NEW_DIAGONALS:
1183     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1184     break;
1185   case MAT_IGNORE_OFF_PROC_ENTRIES:
1186     a->donotstash = flg;
1187     break;
1188   case MAT_USE_HASH_TABLE:
1189     a->ht_flag = flg;
1190     break;
1191   case MAT_HERMITIAN:
1192     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1193   case MAT_SYMMETRIC:
1194   case MAT_STRUCTURALLY_SYMMETRIC:
1195   case MAT_SYMMETRY_ETERNAL:
1196     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1197     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1198     break;
1199   case MAT_IGNORE_LOWER_TRIANGULAR:
1200     aA->ignore_ltriangular = flg;
1201     break;
1202   case MAT_ERROR_LOWER_TRIANGULAR:
1203     aA->ignore_ltriangular = flg;
1204     break;
1205   case MAT_GETROW_UPPERTRIANGULAR:
1206     aA->getrow_utriangular = flg;
1207     break;
1208   default:
1209     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1216 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1217 {
1218   PetscErrorCode ierr;
1219   PetscFunctionBegin;
1220   if (MAT_INITIAL_MATRIX || *B != A) {
1221     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1222   }
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1228 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1229 {
1230   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1231   Mat            a=baij->A, b=baij->B;
1232   PetscErrorCode ierr;
1233   PetscInt       nv,m,n;
1234   PetscTruth     flg;
1235 
1236   PetscFunctionBegin;
1237   if (ll != rr){
1238     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1239     if (!flg)
1240       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1241   }
1242   if (!ll) PetscFunctionReturn(0);
1243 
1244   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1245   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1246 
1247   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1248   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1249 
1250   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251 
1252   /* left diagonalscale the off-diagonal part */
1253   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1254 
1255   /* scale the diagonal part */
1256   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1257 
1258   /* right diagonalscale the off-diagonal part */
1259   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1266 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1267 {
1268   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatEqual_MPISBAIJ"
1280 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1281 {
1282   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1283   Mat            a,b,c,d;
1284   PetscTruth     flg;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   a = matA->A; b = matA->B;
1289   c = matB->A; d = matB->B;
1290 
1291   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1292   if (flg) {
1293     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1294   }
1295   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatCopy_MPISBAIJ"
1301 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1302 {
1303   PetscErrorCode ierr;
1304   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1305   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1306 
1307   PetscFunctionBegin;
1308   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1309   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1310     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1311     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1312     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1313   } else {
1314     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1315     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1316   }
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1322 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1323 {
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #include "petscblaslapack.h"
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1334 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1335 {
1336   PetscErrorCode ierr;
1337   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1338   PetscBLASInt   bnz,one=1;
1339   Mat_SeqSBAIJ   *xa,*ya;
1340   Mat_SeqBAIJ    *xb,*yb;
1341 
1342   PetscFunctionBegin;
1343   if (str == SAME_NONZERO_PATTERN) {
1344     PetscScalar alpha = a;
1345     xa = (Mat_SeqSBAIJ *)xx->A->data;
1346     ya = (Mat_SeqSBAIJ *)yy->A->data;
1347     bnz = PetscBLASIntCast(xa->nz);
1348     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1349     xb = (Mat_SeqBAIJ *)xx->B->data;
1350     yb = (Mat_SeqBAIJ *)yy->B->data;
1351     bnz = PetscBLASIntCast(xb->nz);
1352     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1353   } else {
1354     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1355     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1356     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1357   }
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1363 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1364 {
1365   PetscErrorCode ierr;
1366   PetscInt       i;
1367   PetscTruth     flg;
1368 
1369   PetscFunctionBegin;
1370   for (i=0; i<n; i++) {
1371     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1372     if (!flg) {
1373       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1374     }
1375   }
1376   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 
1381 /* -------------------------------------------------------------------*/
1382 static struct _MatOps MatOps_Values = {
1383        MatSetValues_MPISBAIJ,
1384        MatGetRow_MPISBAIJ,
1385        MatRestoreRow_MPISBAIJ,
1386        MatMult_MPISBAIJ,
1387 /* 4*/ MatMultAdd_MPISBAIJ,
1388        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1389        MatMultAdd_MPISBAIJ,
1390        0,
1391        0,
1392        0,
1393 /*10*/ 0,
1394        0,
1395        0,
1396        MatRelax_MPISBAIJ,
1397        MatTranspose_MPISBAIJ,
1398 /*15*/ MatGetInfo_MPISBAIJ,
1399        MatEqual_MPISBAIJ,
1400        MatGetDiagonal_MPISBAIJ,
1401        MatDiagonalScale_MPISBAIJ,
1402        MatNorm_MPISBAIJ,
1403 /*20*/ MatAssemblyBegin_MPISBAIJ,
1404        MatAssemblyEnd_MPISBAIJ,
1405        MatSetOption_MPISBAIJ,
1406        MatZeroEntries_MPISBAIJ,
1407 /*24*/ 0,
1408        0,
1409        0,
1410        0,
1411        0,
1412 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1413        0,
1414        0,
1415        0,
1416        0,
1417 /*34*/ MatDuplicate_MPISBAIJ,
1418        0,
1419        0,
1420        0,
1421        0,
1422 /*39*/ MatAXPY_MPISBAIJ,
1423        MatGetSubMatrices_MPISBAIJ,
1424        MatIncreaseOverlap_MPISBAIJ,
1425        MatGetValues_MPISBAIJ,
1426        MatCopy_MPISBAIJ,
1427 /*44*/ 0,
1428        MatScale_MPISBAIJ,
1429        0,
1430        0,
1431        0,
1432 /*49*/ 0,
1433        0,
1434        0,
1435        0,
1436        0,
1437 /*54*/ 0,
1438        0,
1439        MatSetUnfactored_MPISBAIJ,
1440        0,
1441        MatSetValuesBlocked_MPISBAIJ,
1442 /*59*/ 0,
1443        0,
1444        0,
1445        0,
1446        0,
1447 /*64*/ 0,
1448        0,
1449        0,
1450        0,
1451        0,
1452 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1453        0,
1454        0,
1455        0,
1456        0,
1457 /*74*/ 0,
1458        0,
1459        0,
1460        0,
1461        0,
1462 /*79*/ 0,
1463        0,
1464        0,
1465        0,
1466        MatLoad_MPISBAIJ,
1467 /*84*/ 0,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*89*/ 0,
1473        0,
1474        0,
1475        0,
1476        0,
1477 /*94*/ 0,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*99*/ 0,
1483        0,
1484        0,
1485        0,
1486        0,
1487 /*104*/0,
1488        MatRealPart_MPISBAIJ,
1489        MatImaginaryPart_MPISBAIJ,
1490        MatGetRowUpperTriangular_MPISBAIJ,
1491        MatRestoreRowUpperTriangular_MPISBAIJ
1492 };
1493 
1494 
1495 EXTERN_C_BEGIN
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1498 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1499 {
1500   PetscFunctionBegin;
1501   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1502   *iscopy = PETSC_FALSE;
1503   PetscFunctionReturn(0);
1504 }
1505 EXTERN_C_END
1506 
1507 EXTERN_C_BEGIN
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1510 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1511 {
1512   Mat_MPISBAIJ   *b;
1513   PetscErrorCode ierr;
1514   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1515 
1516   PetscFunctionBegin;
1517   if (bs < 0){
1518     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1519       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1520     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1521     bs   = PetscAbs(bs);
1522   }
1523   if ((d_nnz || o_nnz) && newbs != bs) {
1524     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1525   }
1526   bs = newbs;
1527 
1528   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1529   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1530   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1531   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1532 
1533   B->rmap->bs = B->cmap->bs = bs;
1534   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
1535   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
1536 
1537   if (d_nnz) {
1538     for (i=0; i<B->rmap->n/bs; i++) {
1539       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1540     }
1541   }
1542   if (o_nnz) {
1543     for (i=0; i<B->rmap->n/bs; i++) {
1544       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1545     }
1546   }
1547 
1548   b   = (Mat_MPISBAIJ*)B->data;
1549   mbs = B->rmap->n/bs;
1550   Mbs = B->rmap->N/bs;
1551   if (mbs*bs != B->rmap->n) {
1552     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1553   }
1554 
1555   B->rmap->bs  = bs;
1556   b->bs2 = bs*bs;
1557   b->mbs = mbs;
1558   b->nbs = mbs;
1559   b->Mbs = Mbs;
1560   b->Nbs = Mbs;
1561 
1562   for (i=0; i<=b->size; i++) {
1563     b->rangebs[i] = B->rmap->range[i]/bs;
1564   }
1565   b->rstartbs = B->rmap->rstart/bs;
1566   b->rendbs   = B->rmap->rend/bs;
1567 
1568   b->cstartbs = B->cmap->rstart/bs;
1569   b->cendbs   = B->cmap->rend/bs;
1570 
1571   if (!B->preallocated) {
1572     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1573     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1574     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1575     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1576     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1577     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1578     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1579     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1580     /* build cache for off array entries formed */
1581     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1582   }
1583 
1584   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1585   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1586   B->preallocated = PETSC_TRUE;
1587   PetscFunctionReturn(0);
1588 }
1589 EXTERN_C_END
1590 
1591 EXTERN_C_BEGIN
1592 #if defined(PETSC_HAVE_MUMPS)
1593 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1594 #endif
1595 #if defined(PETSC_HAVE_SPOOLES)
1596 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1597 #endif
1598 #if defined(PETSC_HAVE_PASTIX)
1599 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1600 #endif
1601 EXTERN_C_END
1602 
1603 /*MC
1604    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1605    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1606 
1607    Options Database Keys:
1608 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1609 
1610   Level: beginner
1611 
1612 .seealso: MatCreateMPISBAIJ
1613 M*/
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatCreate_MPISBAIJ"
1618 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1619 {
1620   Mat_MPISBAIJ   *b;
1621   PetscErrorCode ierr;
1622   PetscTruth     flg;
1623 
1624   PetscFunctionBegin;
1625 
1626   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1627   B->data = (void*)b;
1628   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1629 
1630   B->ops->destroy    = MatDestroy_MPISBAIJ;
1631   B->ops->view       = MatView_MPISBAIJ;
1632   B->mapping         = 0;
1633   B->assembled       = PETSC_FALSE;
1634 
1635   B->insertmode = NOT_SET_VALUES;
1636   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1637   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1638 
1639   /* build local table of row and column ownerships */
1640   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1641 
1642   /* build cache for off array entries formed */
1643   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1644   b->donotstash  = PETSC_FALSE;
1645   b->colmap      = PETSC_NULL;
1646   b->garray      = PETSC_NULL;
1647   b->roworiented = PETSC_TRUE;
1648 
1649   /* stuff used in block assembly */
1650   b->barray       = 0;
1651 
1652   /* stuff used for matrix vector multiply */
1653   b->lvec         = 0;
1654   b->Mvctx        = 0;
1655   b->slvec0       = 0;
1656   b->slvec0b      = 0;
1657   b->slvec1       = 0;
1658   b->slvec1a      = 0;
1659   b->slvec1b      = 0;
1660   b->sMvctx       = 0;
1661 
1662   /* stuff for MatGetRow() */
1663   b->rowindices   = 0;
1664   b->rowvalues    = 0;
1665   b->getrowactive = PETSC_FALSE;
1666 
1667   /* hash table stuff */
1668   b->ht           = 0;
1669   b->hd           = 0;
1670   b->ht_size      = 0;
1671   b->ht_flag      = PETSC_FALSE;
1672   b->ht_fact      = 0;
1673   b->ht_total_ct  = 0;
1674   b->ht_insert_ct = 0;
1675 
1676   b->in_loc       = 0;
1677   b->v_loc        = 0;
1678   b->n_loc        = 0;
1679   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1680     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1681     if (flg) {
1682       PetscReal fact = 1.39;
1683       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1684       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1685       if (fact <= 1.0) fact = 1.39;
1686       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1687       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1688     }
1689   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1690 
1691 #if defined(PETSC_HAVE_PASTIX)
1692   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpisbaij_pastix_C",
1693 					   "MatGetFactor_mpisbaij_pastix",
1694 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1695 #endif
1696 #if defined(PETSC_HAVE_MUMPS)
1697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpisbaij_mumps_C",
1698                                      "MatGetFactor_mpisbaij_mumps",
1699                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1700 #endif
1701 #if defined(PETSC_HAVE_SPOOLES)
1702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpisbaij_spooles_C",
1703                                      "MatGetFactor_mpisbaij_spooles",
1704                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1705 #endif
1706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1707                                      "MatStoreValues_MPISBAIJ",
1708                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1709   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1710                                      "MatRetrieveValues_MPISBAIJ",
1711                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1712   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1713                                      "MatGetDiagonalBlock_MPISBAIJ",
1714                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1715   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1716                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1717                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1718   B->symmetric                  = PETSC_TRUE;
1719   B->structurally_symmetric     = PETSC_TRUE;
1720   B->symmetric_set              = PETSC_TRUE;
1721   B->structurally_symmetric_set = PETSC_TRUE;
1722   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1723   PetscFunctionReturn(0);
1724 }
1725 EXTERN_C_END
1726 
1727 /*MC
1728    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1729 
1730    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1731    and MATMPISBAIJ otherwise.
1732 
1733    Options Database Keys:
1734 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1735 
1736   Level: beginner
1737 
1738 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1739 M*/
1740 
1741 EXTERN_C_BEGIN
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatCreate_SBAIJ"
1744 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1745 {
1746   PetscErrorCode ierr;
1747   PetscMPIInt    size;
1748 
1749   PetscFunctionBegin;
1750   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1751   if (size == 1) {
1752     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1753   } else {
1754     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 EXTERN_C_END
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1762 /*@C
1763    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1764    the user should preallocate the matrix storage by setting the parameters
1765    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1766    performance can be increased by more than a factor of 50.
1767 
1768    Collective on Mat
1769 
1770    Input Parameters:
1771 +  A - the matrix
1772 .  bs   - size of blockk
1773 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1774            submatrix  (same for all local rows)
1775 .  d_nnz - array containing the number of block nonzeros in the various block rows
1776            in the upper triangular and diagonal part of the in diagonal portion of the local
1777            (possibly different for each block row) or PETSC_NULL.  You must leave room
1778            for the diagonal entry even if it is zero.
1779 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1780            submatrix (same for all local rows).
1781 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1782            off-diagonal portion of the local submatrix (possibly different for
1783            each block row) or PETSC_NULL.
1784 
1785 
1786    Options Database Keys:
1787 .   -mat_no_unroll - uses code that does not unroll the loops in the
1788                      block calculations (much slower)
1789 .   -mat_block_size - size of the blocks to use
1790 
1791    Notes:
1792 
1793    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1794    than it must be used on all processors that share the object for that argument.
1795 
1796    If the *_nnz parameter is given then the *_nz parameter is ignored
1797 
1798    Storage Information:
1799    For a square global matrix we define each processor's diagonal portion
1800    to be its local rows and the corresponding columns (a square submatrix);
1801    each processor's off-diagonal portion encompasses the remainder of the
1802    local matrix (a rectangular submatrix).
1803 
1804    The user can specify preallocated storage for the diagonal part of
1805    the local submatrix with either d_nz or d_nnz (not both).  Set
1806    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1807    memory allocation.  Likewise, specify preallocated storage for the
1808    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1809 
1810    You can call MatGetInfo() to get information on how effective the preallocation was;
1811    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1812    You can also run with the option -info and look for messages with the string
1813    malloc in them to see if additional memory allocation was needed.
1814 
1815    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1816    the figure below we depict these three local rows and all columns (0-11).
1817 
1818 .vb
1819            0 1 2 3 4 5 6 7 8 9 10 11
1820           -------------------
1821    row 3  |  o o o d d d o o o o o o
1822    row 4  |  o o o d d d o o o o o o
1823    row 5  |  o o o d d d o o o o o o
1824           -------------------
1825 .ve
1826 
1827    Thus, any entries in the d locations are stored in the d (diagonal)
1828    submatrix, and any entries in the o locations are stored in the
1829    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1830    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1831 
1832    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1833    plus the diagonal part of the d matrix,
1834    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1835    In general, for PDE problems in which most nonzeros are near the diagonal,
1836    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1837    or you will get TERRIBLE performance; see the users' manual chapter on
1838    matrices.
1839 
1840    Level: intermediate
1841 
1842 .keywords: matrix, block, aij, compressed row, sparse, parallel
1843 
1844 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1845 @*/
1846 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1847 {
1848   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1849 
1850   PetscFunctionBegin;
1851   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1852   if (f) {
1853     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1854   }
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatCreateMPISBAIJ"
1860 /*@C
1861    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1862    (block compressed row).  For good matrix assembly performance
1863    the user should preallocate the matrix storage by setting the parameters
1864    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1865    performance can be increased by more than a factor of 50.
1866 
1867    Collective on MPI_Comm
1868 
1869    Input Parameters:
1870 +  comm - MPI communicator
1871 .  bs   - size of blockk
1872 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1873            This value should be the same as the local size used in creating the
1874            y vector for the matrix-vector product y = Ax.
1875 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1876            This value should be the same as the local size used in creating the
1877            x vector for the matrix-vector product y = Ax.
1878 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1879 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1880 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1881            submatrix  (same for all local rows)
1882 .  d_nnz - array containing the number of block nonzeros in the various block rows
1883            in the upper triangular portion of the in diagonal portion of the local
1884            (possibly different for each block block row) or PETSC_NULL.
1885            You must leave room for the diagonal entry even if it is zero.
1886 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1887            submatrix (same for all local rows).
1888 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1889            off-diagonal portion of the local submatrix (possibly different for
1890            each block row) or PETSC_NULL.
1891 
1892    Output Parameter:
1893 .  A - the matrix
1894 
1895    Options Database Keys:
1896 .   -mat_no_unroll - uses code that does not unroll the loops in the
1897                      block calculations (much slower)
1898 .   -mat_block_size - size of the blocks to use
1899 .   -mat_mpi - use the parallel matrix data structures even on one processor
1900                (defaults to using SeqBAIJ format on one processor)
1901 
1902    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1903    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1904    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1905 
1906    Notes:
1907    The number of rows and columns must be divisible by blocksize.
1908    This matrix type does not support complex Hermitian operation.
1909 
1910    The user MUST specify either the local or global matrix dimensions
1911    (possibly both).
1912 
1913    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1914    than it must be used on all processors that share the object for that argument.
1915 
1916    If the *_nnz parameter is given then the *_nz parameter is ignored
1917 
1918    Storage Information:
1919    For a square global matrix we define each processor's diagonal portion
1920    to be its local rows and the corresponding columns (a square submatrix);
1921    each processor's off-diagonal portion encompasses the remainder of the
1922    local matrix (a rectangular submatrix).
1923 
1924    The user can specify preallocated storage for the diagonal part of
1925    the local submatrix with either d_nz or d_nnz (not both).  Set
1926    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1927    memory allocation.  Likewise, specify preallocated storage for the
1928    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1929 
1930    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1931    the figure below we depict these three local rows and all columns (0-11).
1932 
1933 .vb
1934            0 1 2 3 4 5 6 7 8 9 10 11
1935           -------------------
1936    row 3  |  o o o d d d o o o o o o
1937    row 4  |  o o o d d d o o o o o o
1938    row 5  |  o o o d d d o o o o o o
1939           -------------------
1940 .ve
1941 
1942    Thus, any entries in the d locations are stored in the d (diagonal)
1943    submatrix, and any entries in the o locations are stored in the
1944    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1945    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1946 
1947    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1948    plus the diagonal part of the d matrix,
1949    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1950    In general, for PDE problems in which most nonzeros are near the diagonal,
1951    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1952    or you will get TERRIBLE performance; see the users' manual chapter on
1953    matrices.
1954 
1955    Level: intermediate
1956 
1957 .keywords: matrix, block, aij, compressed row, sparse, parallel
1958 
1959 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1960 @*/
1961 
1962 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1963 {
1964   PetscErrorCode ierr;
1965   PetscMPIInt    size;
1966 
1967   PetscFunctionBegin;
1968   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1969   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1970   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1971   if (size > 1) {
1972     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1973     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1974   } else {
1975     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1976     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1977   }
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 
1982 #undef __FUNCT__
1983 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1984 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1985 {
1986   Mat            mat;
1987   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1988   PetscErrorCode ierr;
1989   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
1990   PetscScalar    *array;
1991 
1992   PetscFunctionBegin;
1993   *newmat       = 0;
1994   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
1995   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
1996   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
1997   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1998   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
1999   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2000 
2001   mat->factor       = matin->factor;
2002   mat->preallocated = PETSC_TRUE;
2003   mat->assembled    = PETSC_TRUE;
2004   mat->insertmode   = NOT_SET_VALUES;
2005 
2006   a = (Mat_MPISBAIJ*)mat->data;
2007   a->bs2   = oldmat->bs2;
2008   a->mbs   = oldmat->mbs;
2009   a->nbs   = oldmat->nbs;
2010   a->Mbs   = oldmat->Mbs;
2011   a->Nbs   = oldmat->Nbs;
2012 
2013 
2014   a->size         = oldmat->size;
2015   a->rank         = oldmat->rank;
2016   a->donotstash   = oldmat->donotstash;
2017   a->roworiented  = oldmat->roworiented;
2018   a->rowindices   = 0;
2019   a->rowvalues    = 0;
2020   a->getrowactive = PETSC_FALSE;
2021   a->barray       = 0;
2022   a->rstartbs    = oldmat->rstartbs;
2023   a->rendbs      = oldmat->rendbs;
2024   a->cstartbs    = oldmat->cstartbs;
2025   a->cendbs      = oldmat->cendbs;
2026 
2027   /* hash table stuff */
2028   a->ht           = 0;
2029   a->hd           = 0;
2030   a->ht_size      = 0;
2031   a->ht_flag      = oldmat->ht_flag;
2032   a->ht_fact      = oldmat->ht_fact;
2033   a->ht_total_ct  = 0;
2034   a->ht_insert_ct = 0;
2035 
2036   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2037   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2038   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2039   if (oldmat->colmap) {
2040 #if defined (PETSC_USE_CTABLE)
2041     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2042 #else
2043     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2044     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2045     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2046 #endif
2047   } else a->colmap = 0;
2048 
2049   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2050     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2051     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2052     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2053   } else a->garray = 0;
2054 
2055   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2056   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2057   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2058   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2059 
2060   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2061   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2062   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2063   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2064 
2065   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2066   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2067   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2068   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2069   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2070   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2071   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2072   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2073   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2074   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2075   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2076   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2077   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2078 
2079   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2080   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2081   a->sMvctx = oldmat->sMvctx;
2082   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2083 
2084   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2085   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2086   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2087   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2088   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2089   *newmat = mat;
2090   PetscFunctionReturn(0);
2091 }
2092 
2093 #include "petscsys.h"
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatLoad_MPISBAIJ"
2097 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2098 {
2099   Mat            A;
2100   PetscErrorCode ierr;
2101   PetscInt       i,nz,j,rstart,rend;
2102   PetscScalar    *vals,*buf;
2103   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2104   MPI_Status     status;
2105   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2106   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2107   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2108   PetscInt       bs=1,Mbs,mbs,extra_rows;
2109   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2110   PetscInt       dcount,kmax,k,nzcount,tmp;
2111   int            fd;
2112 
2113   PetscFunctionBegin;
2114   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2115     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2116   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2117 
2118   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2119   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2120   if (!rank) {
2121     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2122     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2123     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2124     if (header[3] < 0) {
2125       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2126     }
2127   }
2128 
2129   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2130   M = header[1]; N = header[2];
2131 
2132   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2133 
2134   /*
2135      This code adds extra rows to make sure the number of rows is
2136      divisible by the blocksize
2137   */
2138   Mbs        = M/bs;
2139   extra_rows = bs - M + bs*(Mbs);
2140   if (extra_rows == bs) extra_rows = 0;
2141   else                  Mbs++;
2142   if (extra_rows &&!rank) {
2143     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2144   }
2145 
2146   /* determine ownership of all rows */
2147   mbs        = Mbs/size + ((Mbs % size) > rank);
2148   m          = mbs*bs;
2149   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2150   browners   = rowners + size + 1;
2151   mmbs       = PetscMPIIntCast(mbs);
2152   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2153   rowners[0] = 0;
2154   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2155   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2156   rstart = rowners[rank];
2157   rend   = rowners[rank+1];
2158 
2159   /* distribute row lengths to all processors */
2160   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2161   if (!rank) {
2162     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2163     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2164     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2165     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2166     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2167     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2168     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2169   } else {
2170     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2171   }
2172 
2173   if (!rank) {   /* procs[0] */
2174     /* calculate the number of nonzeros on each processor */
2175     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2176     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2177     for (i=0; i<size; i++) {
2178       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2179         procsnz[i] += rowlengths[j];
2180       }
2181     }
2182     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2183 
2184     /* determine max buffer needed and allocate it */
2185     maxnz = 0;
2186     for (i=0; i<size; i++) {
2187       maxnz = PetscMax(maxnz,procsnz[i]);
2188     }
2189     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2190 
2191     /* read in my part of the matrix column indices  */
2192     nz     = procsnz[0];
2193     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2194     mycols = ibuf;
2195     if (size == 1)  nz -= extra_rows;
2196     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2197     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2198 
2199     /* read in every ones (except the last) and ship off */
2200     for (i=1; i<size-1; i++) {
2201       nz   = procsnz[i];
2202       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2203       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2204     }
2205     /* read in the stuff for the last proc */
2206     if (size != 1) {
2207       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2208       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2209       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2210       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2211     }
2212     ierr = PetscFree(cols);CHKERRQ(ierr);
2213   } else {  /* procs[i], i>0 */
2214     /* determine buffer space needed for message */
2215     nz = 0;
2216     for (i=0; i<m; i++) {
2217       nz += locrowlens[i];
2218     }
2219     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2220     mycols = ibuf;
2221     /* receive message of column indices*/
2222     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2223     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2224     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2225   }
2226 
2227   /* loop over local rows, determining number of off diagonal entries */
2228   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2229   odlens   = dlens + (rend-rstart);
2230   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2231   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2232   masked1  = mask    + Mbs;
2233   masked2  = masked1 + Mbs;
2234   rowcount = 0; nzcount = 0;
2235   for (i=0; i<mbs; i++) {
2236     dcount  = 0;
2237     odcount = 0;
2238     for (j=0; j<bs; j++) {
2239       kmax = locrowlens[rowcount];
2240       for (k=0; k<kmax; k++) {
2241         tmp = mycols[nzcount++]/bs; /* block col. index */
2242         if (!mask[tmp]) {
2243           mask[tmp] = 1;
2244           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2245           else masked1[dcount++] = tmp; /* entry in diag portion */
2246         }
2247       }
2248       rowcount++;
2249     }
2250 
2251     dlens[i]  = dcount;  /* d_nzz[i] */
2252     odlens[i] = odcount; /* o_nzz[i] */
2253 
2254     /* zero out the mask elements we set */
2255     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2256     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2257   }
2258 
2259   /* create our matrix */
2260   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2261   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2262   ierr = MatSetType(A,type);CHKERRQ(ierr);
2263   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2264   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2265 
2266   if (!rank) {
2267     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2268     /* read in my part of the matrix numerical values  */
2269     nz = procsnz[0];
2270     vals = buf;
2271     mycols = ibuf;
2272     if (size == 1)  nz -= extra_rows;
2273     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2274     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2275 
2276     /* insert into matrix */
2277     jj      = rstart*bs;
2278     for (i=0; i<m; i++) {
2279       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2280       mycols += locrowlens[i];
2281       vals   += locrowlens[i];
2282       jj++;
2283     }
2284 
2285     /* read in other processors (except the last one) and ship out */
2286     for (i=1; i<size-1; i++) {
2287       nz   = procsnz[i];
2288       vals = buf;
2289       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2290       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2291     }
2292     /* the last proc */
2293     if (size != 1){
2294       nz   = procsnz[i] - extra_rows;
2295       vals = buf;
2296       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2297       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2298       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2299     }
2300     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2301 
2302   } else {
2303     /* receive numeric values */
2304     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2305 
2306     /* receive message of values*/
2307     vals   = buf;
2308     mycols = ibuf;
2309     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2310     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2311     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2312 
2313     /* insert into matrix */
2314     jj      = rstart*bs;
2315     for (i=0; i<m; i++) {
2316       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2317       mycols += locrowlens[i];
2318       vals   += locrowlens[i];
2319       jj++;
2320     }
2321   }
2322 
2323   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2324   ierr = PetscFree(buf);CHKERRQ(ierr);
2325   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2326   ierr = PetscFree(rowners);CHKERRQ(ierr);
2327   ierr = PetscFree(dlens);CHKERRQ(ierr);
2328   ierr = PetscFree(mask);CHKERRQ(ierr);
2329   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2330   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2331   *newmat = A;
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2337 /*XXXXX@
2338    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2339 
2340    Input Parameters:
2341 .  mat  - the matrix
2342 .  fact - factor
2343 
2344    Collective on Mat
2345 
2346    Level: advanced
2347 
2348   Notes:
2349    This can also be set by the command line option: -mat_use_hash_table fact
2350 
2351 .keywords: matrix, hashtable, factor, HT
2352 
2353 .seealso: MatSetOption()
2354 @XXXXX*/
2355 
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2359 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2360 {
2361   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2362   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2363   PetscReal      atmp;
2364   PetscReal      *work,*svalues,*rvalues;
2365   PetscErrorCode ierr;
2366   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2367   PetscMPIInt    rank,size;
2368   PetscInt       *rowners_bs,dest,count,source;
2369   PetscScalar    *va;
2370   MatScalar      *ba;
2371   MPI_Status     stat;
2372 
2373   PetscFunctionBegin;
2374   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2375   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2376   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2377 
2378   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2379   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2380 
2381   bs   = A->rmap->bs;
2382   mbs  = a->mbs;
2383   Mbs  = a->Mbs;
2384   ba   = b->a;
2385   bi   = b->i;
2386   bj   = b->j;
2387 
2388   /* find ownerships */
2389   rowners_bs = A->rmap->range;
2390 
2391   /* each proc creates an array to be distributed */
2392   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2393   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2394 
2395   /* row_max for B */
2396   if (rank != size-1){
2397     for (i=0; i<mbs; i++) {
2398       ncols = bi[1] - bi[0]; bi++;
2399       brow  = bs*i;
2400       for (j=0; j<ncols; j++){
2401         bcol = bs*(*bj);
2402         for (kcol=0; kcol<bs; kcol++){
2403           col = bcol + kcol;                 /* local col index */
2404           col += rowners_bs[rank+1];      /* global col index */
2405           for (krow=0; krow<bs; krow++){
2406             atmp = PetscAbsScalar(*ba); ba++;
2407             row = brow + krow;    /* local row index */
2408             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2409             if (work[col] < atmp) work[col] = atmp;
2410           }
2411         }
2412         bj++;
2413       }
2414     }
2415 
2416     /* send values to its owners */
2417     for (dest=rank+1; dest<size; dest++){
2418       svalues = work + rowners_bs[dest];
2419       count   = rowners_bs[dest+1]-rowners_bs[dest];
2420       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2421     }
2422   }
2423 
2424   /* receive values */
2425   if (rank){
2426     rvalues = work;
2427     count   = rowners_bs[rank+1]-rowners_bs[rank];
2428     for (source=0; source<rank; source++){
2429       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2430       /* process values */
2431       for (i=0; i<count; i++){
2432         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2433       }
2434     }
2435   }
2436 
2437   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2438   ierr = PetscFree(work);CHKERRQ(ierr);
2439   PetscFunctionReturn(0);
2440 }
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatRelax_MPISBAIJ"
2444 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2445 {
2446   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2447   PetscErrorCode ierr;
2448   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2449   PetscScalar    *x,*b,*ptr,*from;
2450   Vec            bb1;
2451 
2452   PetscFunctionBegin;
2453   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2454   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2455 
2456   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2457     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2458       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2459       its--;
2460     }
2461 
2462     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2463     while (its--){
2464 
2465       /* lower triangular part: slvec0b = - B^T*xx */
2466       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2467 
2468       /* copy xx into slvec0a */
2469       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2470       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2471       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2472       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2473 
2474       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2475 
2476       /* copy bb into slvec1a */
2477       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2478       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2479       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2480       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2481 
2482       /* set slvec1b = 0 */
2483       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2484 
2485       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2486       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2487       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2488       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2489 
2490       /* upper triangular part: bb1 = bb1 - B*x */
2491       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2492 
2493       /* local diagonal sweep */
2494       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2495     }
2496     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2497   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2498     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2499   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2500     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2501   } else if (flag & SOR_EISENSTAT) {
2502     Vec               xx1;
2503     PetscTruth        hasop;
2504     const PetscScalar *diag;
2505     PetscScalar       *sl;
2506     PetscInt          i,n;
2507 
2508     if (!mat->xx1) {
2509       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2510       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2511     }
2512     xx1 = mat->xx1;
2513     bb1 = mat->bb1;
2514 
2515     ierr = (*mat->A->ops->relax)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2516 
2517     if (!mat->diag) {
2518       /* this is wrong for same matrix with new nonzero values */
2519       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2520       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2521     }
2522     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2523 
2524     if (hasop) {
2525       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2526       ierr = VecAYPX(mat->slvec1a,-1.0,bb);CHKERRQ(ierr);
2527     } else {
2528       /*
2529           These two lines are replaced by code that may be a bit faster for a good compiler
2530       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2531       ierr = VecAYPX(mat->slvec1a,-1.0,bb);CHKERRQ(ierr);
2532       */
2533       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2534       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2535       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2536       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2537       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2538       for (i=0; i<n; i++) {
2539         sl[i] = b[i] - diag[i]*x[i];
2540       }
2541       ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2542       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2543       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2544       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2545       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2546     }
2547 
2548     /* multiply off-diagonal portion of matrix */
2549     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2550     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2551     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2552     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2553     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2554     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2555     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2556     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2557     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2558     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2559 
2560     /* local sweep */
2561     ierr = (*mat->A->ops->relax)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2562     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2563   } else {
2564     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2565   }
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 #undef __FUNCT__
2570 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2571 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2572 {
2573   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2574   PetscErrorCode ierr;
2575   Vec            lvec1,bb1;
2576 
2577   PetscFunctionBegin;
2578   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2579   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2580 
2581   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2582     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2583       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2584       its--;
2585     }
2586 
2587     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2588     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2589     while (its--){
2590       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2591 
2592       /* lower diagonal part: bb1 = bb - B^T*xx */
2593       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2594       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2595 
2596       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2597       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2598       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2599 
2600       /* upper diagonal part: bb1 = bb1 - B*x */
2601       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2602       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2603 
2604       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2605 
2606       /* diagonal sweep */
2607       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2608     }
2609     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2610     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2611   } else {
2612     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2613   }
2614   PetscFunctionReturn(0);
2615 }
2616 
2617