xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 547795f9f637a72c8a9fe35eb9ea2bca0ad69cfa)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap->rstart;
146   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
147   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169 
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++  = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend){
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         }
309         else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined (PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined (PETSC_USE_CTABLE)
330 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col =  in[j];
338             }
339           }
340           else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend){
376           col = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined (PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data --;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else {
396       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
397     }
398   }
399  PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "MatNorm_MPISBAIJ"
404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
405 {
406   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
407   PetscErrorCode ierr;
408   PetscReal      sum[2],*lnorm2;
409 
410   PetscFunctionBegin;
411   if (baij->size == 1) {
412     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
413   } else {
414     if (type == NORM_FROBENIUS) {
415       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
416       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
418       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
420       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
421       *norm = sqrt(sum[0] + 2*sum[1]);
422       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
423     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
424       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
425       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
426       PetscReal    *rsum,*rsum2,vabs;
427       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
428       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
429       MatScalar    *v;
430 
431       ierr  = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
432       rsum2 = rsum + mat->cmap->N;
433       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++){
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++){
442             for (row=0; row<bs; row++){
443               vabs = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++){
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++){
459             for (row=0; row<bs; row++){
460               vabs = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree(rsum);CHKERRQ(ierr);
473     } else {
474       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
475     }
476   }
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
482 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
483 {
484   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
485   PetscErrorCode ierr;
486   PetscInt       nstash,reallocs;
487   InsertMode     addv;
488 
489   PetscFunctionBegin;
490   if (baij->donotstash) {
491     PetscFunctionReturn(0);
492   }
493 
494   /* make sure all processors are either in INSERTMODE or ADDMODE */
495   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
496   if (addv == (ADD_VALUES|INSERT_VALUES)) {
497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
498   }
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) {
807     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
808   }
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) {
844     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845   }
846 
847   /* diagonal part */
848   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
849   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
850 
851   /* subdiagonal part */
852   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
853 
854   /* copy x into the vec slvec0 */
855   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
859   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
860   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
861 
862   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864   /* supperdiagonal part */
865   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
871 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
872 {
873   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
874   PetscErrorCode ierr;
875   PetscInt       nt;
876 
877   PetscFunctionBegin;
878   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
879   if (nt != A->cmap->n) {
880     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
881   }
882   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
883   if (nt != A->rmap->N) {
884     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
885   }
886 
887   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   /* do diagonal part */
889   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
890   /* do supperdiagonal part */
891   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
893   /* do subdiagonal part */
894   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
895   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
896   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
897 
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
903 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
904 {
905   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
906   PetscErrorCode ierr;
907   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
908   PetscScalar    *x,*from,zero=0.0;
909 
910   PetscFunctionBegin;
911   /*
912   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
913   PetscSynchronizedFlush(((PetscObject)A)->comm);
914   */
915   /* diagonal part */
916   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
917   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
918 
919   /* subdiagonal part */
920   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
921 
922   /* copy x into the vec slvec0 */
923   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
924   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
925   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
926   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
927 
928   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
930   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931 
932   /* supperdiagonal part */
933   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
934 
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
940 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
941 {
942   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
943   PetscErrorCode ierr;
944 
945   PetscFunctionBegin;
946   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   /* do diagonal part */
948   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
949   /* do supperdiagonal part */
950   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
952 
953   /* do subdiagonal part */
954   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
955   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
956   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
957 
958   PetscFunctionReturn(0);
959 }
960 
961 /*
962   This only works correctly for square matrices where the subblock A->A is the
963    diagonal block
964 */
965 #undef __FUNCT__
966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
968 {
969   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
974   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatScale_MPISBAIJ"
980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
981 {
982   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
987   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatGetRow_MPISBAIJ"
993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
994 {
995   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
996   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
997   PetscErrorCode ierr;
998   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
999   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1000   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1001 
1002   PetscFunctionBegin;
1003   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1004   mat->getrowactive = PETSC_TRUE;
1005 
1006   if (!mat->rowvalues && (idx || v)) {
1007     /*
1008         allocate enough space to hold information from the longest row.
1009     */
1010     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1011     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1012     PetscInt     max = 1,mbs = mat->mbs,tmp;
1013     for (i=0; i<mbs; i++) {
1014       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1015       if (max < tmp) { max = tmp; }
1016     }
1017     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1018     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1019   }
1020 
1021   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1022   lrow = row - brstart;  /* local row index */
1023 
1024   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1025   if (!v)   {pvA = 0; pvB = 0;}
1026   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1027   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1028   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1029   nztot = nzA + nzB;
1030 
1031   cmap  = mat->garray;
1032   if (v  || idx) {
1033     if (nztot) {
1034       /* Sort by increasing column numbers, assuming A and B already sorted */
1035       PetscInt imark = -1;
1036       if (v) {
1037         *v = v_p = mat->rowvalues;
1038         for (i=0; i<nzB; i++) {
1039           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1040           else break;
1041         }
1042         imark = i;
1043         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1044         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1045       }
1046       if (idx) {
1047         *idx = idx_p = mat->rowindices;
1048         if (imark > -1) {
1049           for (i=0; i<imark; i++) {
1050             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1051           }
1052         } else {
1053           for (i=0; i<nzB; i++) {
1054             if (cmap[cworkB[i]/bs] < cstart)
1055               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056             else break;
1057           }
1058           imark = i;
1059         }
1060         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1061         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1062       }
1063     } else {
1064       if (idx) *idx = 0;
1065       if (v)   *v   = 0;
1066     }
1067   }
1068   *nz = nztot;
1069   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1070   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1076 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1077 {
1078   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1079 
1080   PetscFunctionBegin;
1081   if (!baij->getrowactive) {
1082     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1083   }
1084   baij->getrowactive = PETSC_FALSE;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1090 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1091 {
1092   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1093   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1094 
1095   PetscFunctionBegin;
1096   aA->getrow_utriangular = PETSC_TRUE;
1097   PetscFunctionReturn(0);
1098 }
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1101 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1102 {
1103   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1104   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1105 
1106   PetscFunctionBegin;
1107   aA->getrow_utriangular = PETSC_FALSE;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1113 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1114 {
1115   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1116   PetscErrorCode ierr;
1117 
1118   PetscFunctionBegin;
1119   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1120   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1126 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1127 {
1128   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1133   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1139 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1140 {
1141   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1146   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1152 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1153 {
1154   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1155   Mat            A = a->A,B = a->B;
1156   PetscErrorCode ierr;
1157   PetscReal      isend[5],irecv[5];
1158 
1159   PetscFunctionBegin;
1160   info->block_size     = (PetscReal)matin->rmap->bs;
1161   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1162   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1163   isend[3] = info->memory;  isend[4] = info->mallocs;
1164   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1165   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1166   isend[3] += info->memory;  isend[4] += info->mallocs;
1167   if (flag == MAT_LOCAL) {
1168     info->nz_used      = isend[0];
1169     info->nz_allocated = isend[1];
1170     info->nz_unneeded  = isend[2];
1171     info->memory       = isend[3];
1172     info->mallocs      = isend[4];
1173   } else if (flag == MAT_GLOBAL_MAX) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   } else if (flag == MAT_GLOBAL_SUM) {
1181     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1182     info->nz_used      = irecv[0];
1183     info->nz_allocated = irecv[1];
1184     info->nz_unneeded  = irecv[2];
1185     info->memory       = irecv[3];
1186     info->mallocs      = irecv[4];
1187   } else {
1188     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1189   }
1190   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1191   info->fill_ratio_needed = 0;
1192   info->factor_mallocs    = 0;
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1198 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1199 {
1200   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1201   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NEW_NONZERO_LOCATIONS:
1207   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1208   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1209   case MAT_KEEP_NONZERO_PATTERN:
1210   case MAT_NEW_NONZERO_LOCATION_ERR:
1211     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1212     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1213     break;
1214   case MAT_ROW_ORIENTED:
1215     a->roworiented = flg;
1216     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1218     break;
1219   case MAT_NEW_DIAGONALS:
1220     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1221     break;
1222   case MAT_IGNORE_OFF_PROC_ENTRIES:
1223     a->donotstash = flg;
1224     break;
1225   case MAT_USE_HASH_TABLE:
1226     a->ht_flag = flg;
1227     break;
1228   case MAT_HERMITIAN:
1229     if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1230     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1231     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1232     break;
1233   case MAT_SYMMETRIC:
1234     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1235     break;
1236   case MAT_STRUCTURALLY_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_SYMMETRY_ETERNAL:
1240     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1241     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1242     break;
1243   case MAT_IGNORE_LOWER_TRIANGULAR:
1244     aA->ignore_ltriangular = flg;
1245     break;
1246   case MAT_ERROR_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_GETROW_UPPERTRIANGULAR:
1250     aA->getrow_utriangular = flg;
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1261 {
1262   PetscErrorCode ierr;
1263   PetscFunctionBegin;
1264   if (MAT_INITIAL_MATRIX || *B != A) {
1265     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1273 {
1274   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1275   Mat            a=baij->A, b=baij->B;
1276   PetscErrorCode ierr;
1277   PetscInt       nv,m,n;
1278   PetscTruth     flg;
1279 
1280   PetscFunctionBegin;
1281   if (ll != rr){
1282     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1283     if (!flg)
1284       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1285   }
1286   if (!ll) PetscFunctionReturn(0);
1287 
1288   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1289   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1290 
1291   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1292   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1293 
1294   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295 
1296   /* left diagonalscale the off-diagonal part */
1297   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1298 
1299   /* scale the diagonal part */
1300   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1301 
1302   /* right diagonalscale the off-diagonal part */
1303   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatEqual_MPISBAIJ"
1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1325 {
1326   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1327   Mat            a,b,c,d;
1328   PetscTruth     flg;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   a = matA->A; b = matA->B;
1333   c = matB->A; d = matB->B;
1334 
1335   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1336   if (flg) {
1337     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1338   }
1339   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCopy_MPISBAIJ"
1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1346 {
1347   PetscErrorCode ierr;
1348   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1349   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1350 
1351   PetscFunctionBegin;
1352   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1353   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1354     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1355     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1356     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1357   } else {
1358     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1359     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1366 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1367 {
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #include "petscblaslapack.h"
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa = (Mat_SeqSBAIJ *)xx->A->data;
1390     ya = (Mat_SeqSBAIJ *)yy->A->data;
1391     bnz = PetscBLASIntCast(xa->nz);
1392     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1393     xb = (Mat_SeqBAIJ *)xx->B->data;
1394     yb = (Mat_SeqBAIJ *)yy->B->data;
1395     bnz = PetscBLASIntCast(xb->nz);
1396     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1397   } else {
1398     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1407 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1408 {
1409   PetscErrorCode ierr;
1410   PetscInt       i;
1411   PetscTruth     flg;
1412 
1413   PetscFunctionBegin;
1414   for (i=0; i<n; i++) {
1415     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1416     if (!flg) {
1417       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1418     }
1419   }
1420   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 
1425 /* -------------------------------------------------------------------*/
1426 static struct _MatOps MatOps_Values = {
1427        MatSetValues_MPISBAIJ,
1428        MatGetRow_MPISBAIJ,
1429        MatRestoreRow_MPISBAIJ,
1430        MatMult_MPISBAIJ,
1431 /* 4*/ MatMultAdd_MPISBAIJ,
1432        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1433        MatMultAdd_MPISBAIJ,
1434        0,
1435        0,
1436        0,
1437 /*10*/ 0,
1438        0,
1439        0,
1440        MatSOR_MPISBAIJ,
1441        MatTranspose_MPISBAIJ,
1442 /*15*/ MatGetInfo_MPISBAIJ,
1443        MatEqual_MPISBAIJ,
1444        MatGetDiagonal_MPISBAIJ,
1445        MatDiagonalScale_MPISBAIJ,
1446        MatNorm_MPISBAIJ,
1447 /*20*/ MatAssemblyBegin_MPISBAIJ,
1448        MatAssemblyEnd_MPISBAIJ,
1449        MatSetOption_MPISBAIJ,
1450        MatZeroEntries_MPISBAIJ,
1451 /*24*/ 0,
1452        0,
1453        0,
1454        0,
1455        0,
1456 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1457        0,
1458        0,
1459        0,
1460        0,
1461 /*34*/ MatDuplicate_MPISBAIJ,
1462        0,
1463        0,
1464        0,
1465        0,
1466 /*39*/ MatAXPY_MPISBAIJ,
1467        MatGetSubMatrices_MPISBAIJ,
1468        MatIncreaseOverlap_MPISBAIJ,
1469        MatGetValues_MPISBAIJ,
1470        MatCopy_MPISBAIJ,
1471 /*44*/ 0,
1472        MatScale_MPISBAIJ,
1473        0,
1474        0,
1475        0,
1476 /*49*/ 0,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*54*/ 0,
1482        0,
1483        MatSetUnfactored_MPISBAIJ,
1484        0,
1485        MatSetValuesBlocked_MPISBAIJ,
1486 /*59*/ 0,
1487        0,
1488        0,
1489        0,
1490        0,
1491 /*64*/ 0,
1492        0,
1493        0,
1494        0,
1495        0,
1496 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*74*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*79*/ 0,
1507        0,
1508        0,
1509        0,
1510        MatLoad_MPISBAIJ,
1511 /*84*/ 0,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*89*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*94*/ 0,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*99*/ 0,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*104*/0,
1532        MatRealPart_MPISBAIJ,
1533        MatImaginaryPart_MPISBAIJ,
1534        MatGetRowUpperTriangular_MPISBAIJ,
1535        MatRestoreRowUpperTriangular_MPISBAIJ
1536 };
1537 
1538 
1539 EXTERN_C_BEGIN
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1542 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1543 {
1544   PetscFunctionBegin;
1545   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1546   *iscopy = PETSC_FALSE;
1547   PetscFunctionReturn(0);
1548 }
1549 EXTERN_C_END
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1554 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1555 {
1556   Mat_MPISBAIJ   *b;
1557   PetscErrorCode ierr;
1558   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1559 
1560   PetscFunctionBegin;
1561   if (bs < 0){
1562     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1563       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1564     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1565     bs   = PetscAbs(bs);
1566   }
1567   if ((d_nnz || o_nnz) && newbs != bs) {
1568     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1569   }
1570   bs = newbs;
1571 
1572   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1573   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1574   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1575   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1576 
1577   B->rmap->bs = B->cmap->bs = bs;
1578   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1579   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1580 
1581   if (d_nnz) {
1582     for (i=0; i<B->rmap->n/bs; i++) {
1583       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1584     }
1585   }
1586   if (o_nnz) {
1587     for (i=0; i<B->rmap->n/bs; i++) {
1588       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1589     }
1590   }
1591 
1592   b   = (Mat_MPISBAIJ*)B->data;
1593   mbs = B->rmap->n/bs;
1594   Mbs = B->rmap->N/bs;
1595   if (mbs*bs != B->rmap->n) {
1596     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1597   }
1598 
1599   B->rmap->bs  = bs;
1600   b->bs2 = bs*bs;
1601   b->mbs = mbs;
1602   b->nbs = mbs;
1603   b->Mbs = Mbs;
1604   b->Nbs = Mbs;
1605 
1606   for (i=0; i<=b->size; i++) {
1607     b->rangebs[i] = B->rmap->range[i]/bs;
1608   }
1609   b->rstartbs = B->rmap->rstart/bs;
1610   b->rendbs   = B->rmap->rend/bs;
1611 
1612   b->cstartbs = B->cmap->rstart/bs;
1613   b->cendbs   = B->cmap->rend/bs;
1614 
1615   if (!B->preallocated) {
1616     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1617     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1618     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1619     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1620     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1621     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1622     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1623     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1624     /* build cache for off array entries formed */
1625     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1626   }
1627 
1628   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1629   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1630   B->preallocated = PETSC_TRUE;
1631   PetscFunctionReturn(0);
1632 }
1633 EXTERN_C_END
1634 
1635 EXTERN_C_BEGIN
1636 #if defined(PETSC_HAVE_MUMPS)
1637 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1638 #endif
1639 #if defined(PETSC_HAVE_SPOOLES)
1640 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1641 #endif
1642 #if defined(PETSC_HAVE_PASTIX)
1643 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1644 #endif
1645 EXTERN_C_END
1646 
1647 /*MC
1648    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1649    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1650 
1651    Options Database Keys:
1652 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1653 
1654   Level: beginner
1655 
1656 .seealso: MatCreateMPISBAIJ
1657 M*/
1658 
1659 EXTERN_C_BEGIN
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatCreate_MPISBAIJ"
1662 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1663 {
1664   Mat_MPISBAIJ   *b;
1665   PetscErrorCode ierr;
1666   PetscTruth     flg;
1667 
1668   PetscFunctionBegin;
1669 
1670   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1671   B->data = (void*)b;
1672   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1673 
1674   B->ops->destroy    = MatDestroy_MPISBAIJ;
1675   B->ops->view       = MatView_MPISBAIJ;
1676   B->mapping         = 0;
1677   B->assembled       = PETSC_FALSE;
1678 
1679   B->insertmode = NOT_SET_VALUES;
1680   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1681   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1682 
1683   /* build local table of row and column ownerships */
1684   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1685 
1686   /* build cache for off array entries formed */
1687   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1688   b->donotstash  = PETSC_FALSE;
1689   b->colmap      = PETSC_NULL;
1690   b->garray      = PETSC_NULL;
1691   b->roworiented = PETSC_TRUE;
1692 
1693   /* stuff used in block assembly */
1694   b->barray       = 0;
1695 
1696   /* stuff used for matrix vector multiply */
1697   b->lvec         = 0;
1698   b->Mvctx        = 0;
1699   b->slvec0       = 0;
1700   b->slvec0b      = 0;
1701   b->slvec1       = 0;
1702   b->slvec1a      = 0;
1703   b->slvec1b      = 0;
1704   b->sMvctx       = 0;
1705 
1706   /* stuff for MatGetRow() */
1707   b->rowindices   = 0;
1708   b->rowvalues    = 0;
1709   b->getrowactive = PETSC_FALSE;
1710 
1711   /* hash table stuff */
1712   b->ht           = 0;
1713   b->hd           = 0;
1714   b->ht_size      = 0;
1715   b->ht_flag      = PETSC_FALSE;
1716   b->ht_fact      = 0;
1717   b->ht_total_ct  = 0;
1718   b->ht_insert_ct = 0;
1719 
1720   b->in_loc       = 0;
1721   b->v_loc        = 0;
1722   b->n_loc        = 0;
1723   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1724     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1725     if (flg) {
1726       PetscReal fact = 1.39;
1727       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1728       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1729       if (fact <= 1.0) fact = 1.39;
1730       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1731       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1732     }
1733   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1734 
1735 #if defined(PETSC_HAVE_PASTIX)
1736   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1737 					   "MatGetFactor_mpisbaij_pastix",
1738 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1739 #endif
1740 #if defined(PETSC_HAVE_MUMPS)
1741   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1742                                      "MatGetFactor_mpisbaij_mumps",
1743                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1744 #endif
1745 #if defined(PETSC_HAVE_SPOOLES)
1746   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1747                                      "MatGetFactor_mpisbaij_spooles",
1748                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1749 #endif
1750   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1751                                      "MatStoreValues_MPISBAIJ",
1752                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1753   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1754                                      "MatRetrieveValues_MPISBAIJ",
1755                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1756   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1757                                      "MatGetDiagonalBlock_MPISBAIJ",
1758                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1759   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1760                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1761                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1762   B->symmetric                  = PETSC_TRUE;
1763   B->structurally_symmetric     = PETSC_TRUE;
1764   B->symmetric_set              = PETSC_TRUE;
1765   B->structurally_symmetric_set = PETSC_TRUE;
1766   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 EXTERN_C_END
1770 
1771 /*MC
1772    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1773 
1774    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1775    and MATMPISBAIJ otherwise.
1776 
1777    Options Database Keys:
1778 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1779 
1780   Level: beginner
1781 
1782 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1783 M*/
1784 
1785 EXTERN_C_BEGIN
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatCreate_SBAIJ"
1788 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1789 {
1790   PetscErrorCode ierr;
1791   PetscMPIInt    size;
1792 
1793   PetscFunctionBegin;
1794   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1795   if (size == 1) {
1796     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 EXTERN_C_END
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1806 /*@C
1807    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1808    the user should preallocate the matrix storage by setting the parameters
1809    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1810    performance can be increased by more than a factor of 50.
1811 
1812    Collective on Mat
1813 
1814    Input Parameters:
1815 +  A - the matrix
1816 .  bs   - size of blockk
1817 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1818            submatrix  (same for all local rows)
1819 .  d_nnz - array containing the number of block nonzeros in the various block rows
1820            in the upper triangular and diagonal part of the in diagonal portion of the local
1821            (possibly different for each block row) or PETSC_NULL.  You must leave room
1822            for the diagonal entry even if it is zero.
1823 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1824            submatrix (same for all local rows).
1825 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1826            off-diagonal portion of the local submatrix (possibly different for
1827            each block row) or PETSC_NULL.
1828 
1829 
1830    Options Database Keys:
1831 .   -mat_no_unroll - uses code that does not unroll the loops in the
1832                      block calculations (much slower)
1833 .   -mat_block_size - size of the blocks to use
1834 
1835    Notes:
1836 
1837    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1838    than it must be used on all processors that share the object for that argument.
1839 
1840    If the *_nnz parameter is given then the *_nz parameter is ignored
1841 
1842    Storage Information:
1843    For a square global matrix we define each processor's diagonal portion
1844    to be its local rows and the corresponding columns (a square submatrix);
1845    each processor's off-diagonal portion encompasses the remainder of the
1846    local matrix (a rectangular submatrix).
1847 
1848    The user can specify preallocated storage for the diagonal part of
1849    the local submatrix with either d_nz or d_nnz (not both).  Set
1850    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1851    memory allocation.  Likewise, specify preallocated storage for the
1852    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1853 
1854    You can call MatGetInfo() to get information on how effective the preallocation was;
1855    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1856    You can also run with the option -info and look for messages with the string
1857    malloc in them to see if additional memory allocation was needed.
1858 
1859    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1860    the figure below we depict these three local rows and all columns (0-11).
1861 
1862 .vb
1863            0 1 2 3 4 5 6 7 8 9 10 11
1864           -------------------
1865    row 3  |  o o o d d d o o o o o o
1866    row 4  |  o o o d d d o o o o o o
1867    row 5  |  o o o d d d o o o o o o
1868           -------------------
1869 .ve
1870 
1871    Thus, any entries in the d locations are stored in the d (diagonal)
1872    submatrix, and any entries in the o locations are stored in the
1873    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1874    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1875 
1876    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1877    plus the diagonal part of the d matrix,
1878    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1879    In general, for PDE problems in which most nonzeros are near the diagonal,
1880    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1881    or you will get TERRIBLE performance; see the users' manual chapter on
1882    matrices.
1883 
1884    Level: intermediate
1885 
1886 .keywords: matrix, block, aij, compressed row, sparse, parallel
1887 
1888 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1889 @*/
1890 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1891 {
1892   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1893 
1894   PetscFunctionBegin;
1895   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1896   if (f) {
1897     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatCreateMPISBAIJ"
1904 /*@C
1905    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1906    (block compressed row).  For good matrix assembly performance
1907    the user should preallocate the matrix storage by setting the parameters
1908    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1909    performance can be increased by more than a factor of 50.
1910 
1911    Collective on MPI_Comm
1912 
1913    Input Parameters:
1914 +  comm - MPI communicator
1915 .  bs   - size of blockk
1916 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1917            This value should be the same as the local size used in creating the
1918            y vector for the matrix-vector product y = Ax.
1919 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1920            This value should be the same as the local size used in creating the
1921            x vector for the matrix-vector product y = Ax.
1922 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1923 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1924 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1925            submatrix  (same for all local rows)
1926 .  d_nnz - array containing the number of block nonzeros in the various block rows
1927            in the upper triangular portion of the in diagonal portion of the local
1928            (possibly different for each block block row) or PETSC_NULL.
1929            You must leave room for the diagonal entry even if it is zero.
1930 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1931            submatrix (same for all local rows).
1932 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1933            off-diagonal portion of the local submatrix (possibly different for
1934            each block row) or PETSC_NULL.
1935 
1936    Output Parameter:
1937 .  A - the matrix
1938 
1939    Options Database Keys:
1940 .   -mat_no_unroll - uses code that does not unroll the loops in the
1941                      block calculations (much slower)
1942 .   -mat_block_size - size of the blocks to use
1943 .   -mat_mpi - use the parallel matrix data structures even on one processor
1944                (defaults to using SeqBAIJ format on one processor)
1945 
1946    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1947    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1948    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1949 
1950    Notes:
1951    The number of rows and columns must be divisible by blocksize.
1952    This matrix type does not support complex Hermitian operation.
1953 
1954    The user MUST specify either the local or global matrix dimensions
1955    (possibly both).
1956 
1957    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1958    than it must be used on all processors that share the object for that argument.
1959 
1960    If the *_nnz parameter is given then the *_nz parameter is ignored
1961 
1962    Storage Information:
1963    For a square global matrix we define each processor's diagonal portion
1964    to be its local rows and the corresponding columns (a square submatrix);
1965    each processor's off-diagonal portion encompasses the remainder of the
1966    local matrix (a rectangular submatrix).
1967 
1968    The user can specify preallocated storage for the diagonal part of
1969    the local submatrix with either d_nz or d_nnz (not both).  Set
1970    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1971    memory allocation.  Likewise, specify preallocated storage for the
1972    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1973 
1974    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1975    the figure below we depict these three local rows and all columns (0-11).
1976 
1977 .vb
1978            0 1 2 3 4 5 6 7 8 9 10 11
1979           -------------------
1980    row 3  |  o o o d d d o o o o o o
1981    row 4  |  o o o d d d o o o o o o
1982    row 5  |  o o o d d d o o o o o o
1983           -------------------
1984 .ve
1985 
1986    Thus, any entries in the d locations are stored in the d (diagonal)
1987    submatrix, and any entries in the o locations are stored in the
1988    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1989    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1990 
1991    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1992    plus the diagonal part of the d matrix,
1993    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1994    In general, for PDE problems in which most nonzeros are near the diagonal,
1995    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1996    or you will get TERRIBLE performance; see the users' manual chapter on
1997    matrices.
1998 
1999    Level: intermediate
2000 
2001 .keywords: matrix, block, aij, compressed row, sparse, parallel
2002 
2003 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2004 @*/
2005 
2006 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2007 {
2008   PetscErrorCode ierr;
2009   PetscMPIInt    size;
2010 
2011   PetscFunctionBegin;
2012   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2013   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2014   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2015   if (size > 1) {
2016     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2017     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2018   } else {
2019     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2020     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2021   }
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2028 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2029 {
2030   Mat            mat;
2031   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2032   PetscErrorCode ierr;
2033   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2034   PetscScalar    *array;
2035 
2036   PetscFunctionBegin;
2037   *newmat       = 0;
2038   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2039   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2040   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2041   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2042   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2043   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2044 
2045   mat->factor       = matin->factor;
2046   mat->preallocated = PETSC_TRUE;
2047   mat->assembled    = PETSC_TRUE;
2048   mat->insertmode   = NOT_SET_VALUES;
2049 
2050   a = (Mat_MPISBAIJ*)mat->data;
2051   a->bs2   = oldmat->bs2;
2052   a->mbs   = oldmat->mbs;
2053   a->nbs   = oldmat->nbs;
2054   a->Mbs   = oldmat->Mbs;
2055   a->Nbs   = oldmat->Nbs;
2056 
2057 
2058   a->size         = oldmat->size;
2059   a->rank         = oldmat->rank;
2060   a->donotstash   = oldmat->donotstash;
2061   a->roworiented  = oldmat->roworiented;
2062   a->rowindices   = 0;
2063   a->rowvalues    = 0;
2064   a->getrowactive = PETSC_FALSE;
2065   a->barray       = 0;
2066   a->rstartbs    = oldmat->rstartbs;
2067   a->rendbs      = oldmat->rendbs;
2068   a->cstartbs    = oldmat->cstartbs;
2069   a->cendbs      = oldmat->cendbs;
2070 
2071   /* hash table stuff */
2072   a->ht           = 0;
2073   a->hd           = 0;
2074   a->ht_size      = 0;
2075   a->ht_flag      = oldmat->ht_flag;
2076   a->ht_fact      = oldmat->ht_fact;
2077   a->ht_total_ct  = 0;
2078   a->ht_insert_ct = 0;
2079 
2080   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2081   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2082   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2083   if (oldmat->colmap) {
2084 #if defined (PETSC_USE_CTABLE)
2085     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2086 #else
2087     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2088     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2089     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2090 #endif
2091   } else a->colmap = 0;
2092 
2093   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2094     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2095     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2096     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2097   } else a->garray = 0;
2098 
2099   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2100   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2101   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2102   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2103 
2104   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2105   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2106   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2107   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2108 
2109   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2110   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2111   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2113   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2114   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2115   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2116   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2117   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2122 
2123   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2124   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2125   a->sMvctx = oldmat->sMvctx;
2126   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2127 
2128   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2129   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2130   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2132   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2133   *newmat = mat;
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #include "petscsys.h"
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatLoad_MPISBAIJ"
2141 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2142 {
2143   Mat            A;
2144   PetscErrorCode ierr;
2145   PetscInt       i,nz,j,rstart,rend;
2146   PetscScalar    *vals,*buf;
2147   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2148   MPI_Status     status;
2149   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2150   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2151   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2152   PetscInt       bs=1,Mbs,mbs,extra_rows;
2153   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2154   PetscInt       dcount,kmax,k,nzcount,tmp;
2155   int            fd;
2156 
2157   PetscFunctionBegin;
2158   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2159     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2160   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2161 
2162   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2163   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2164   if (!rank) {
2165     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2166     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2167     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2168     if (header[3] < 0) {
2169       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2170     }
2171   }
2172 
2173   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2174   M = header[1]; N = header[2];
2175 
2176   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2177 
2178   /*
2179      This code adds extra rows to make sure the number of rows is
2180      divisible by the blocksize
2181   */
2182   Mbs        = M/bs;
2183   extra_rows = bs - M + bs*(Mbs);
2184   if (extra_rows == bs) extra_rows = 0;
2185   else                  Mbs++;
2186   if (extra_rows &&!rank) {
2187     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2188   }
2189 
2190   /* determine ownership of all rows */
2191   mbs        = Mbs/size + ((Mbs % size) > rank);
2192   m          = mbs*bs;
2193   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2194   browners   = rowners + size + 1;
2195   mmbs       = PetscMPIIntCast(mbs);
2196   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2197   rowners[0] = 0;
2198   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2199   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2200   rstart = rowners[rank];
2201   rend   = rowners[rank+1];
2202 
2203   /* distribute row lengths to all processors */
2204   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2205   if (!rank) {
2206     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2207     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2208     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2209     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2210     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2211     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2212     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2213   } else {
2214     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2215   }
2216 
2217   if (!rank) {   /* procs[0] */
2218     /* calculate the number of nonzeros on each processor */
2219     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2220     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2221     for (i=0; i<size; i++) {
2222       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2223         procsnz[i] += rowlengths[j];
2224       }
2225     }
2226     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2227 
2228     /* determine max buffer needed and allocate it */
2229     maxnz = 0;
2230     for (i=0; i<size; i++) {
2231       maxnz = PetscMax(maxnz,procsnz[i]);
2232     }
2233     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2234 
2235     /* read in my part of the matrix column indices  */
2236     nz     = procsnz[0];
2237     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2238     mycols = ibuf;
2239     if (size == 1)  nz -= extra_rows;
2240     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2241     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2242 
2243     /* read in every ones (except the last) and ship off */
2244     for (i=1; i<size-1; i++) {
2245       nz   = procsnz[i];
2246       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2247       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2248     }
2249     /* read in the stuff for the last proc */
2250     if (size != 1) {
2251       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2252       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2253       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2254       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2255     }
2256     ierr = PetscFree(cols);CHKERRQ(ierr);
2257   } else {  /* procs[i], i>0 */
2258     /* determine buffer space needed for message */
2259     nz = 0;
2260     for (i=0; i<m; i++) {
2261       nz += locrowlens[i];
2262     }
2263     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2264     mycols = ibuf;
2265     /* receive message of column indices*/
2266     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2267     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2268     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2269   }
2270 
2271   /* loop over local rows, determining number of off diagonal entries */
2272   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2273   odlens   = dlens + (rend-rstart);
2274   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2275   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2276   masked1  = mask    + Mbs;
2277   masked2  = masked1 + Mbs;
2278   rowcount = 0; nzcount = 0;
2279   for (i=0; i<mbs; i++) {
2280     dcount  = 0;
2281     odcount = 0;
2282     for (j=0; j<bs; j++) {
2283       kmax = locrowlens[rowcount];
2284       for (k=0; k<kmax; k++) {
2285         tmp = mycols[nzcount++]/bs; /* block col. index */
2286         if (!mask[tmp]) {
2287           mask[tmp] = 1;
2288           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2289           else masked1[dcount++] = tmp; /* entry in diag portion */
2290         }
2291       }
2292       rowcount++;
2293     }
2294 
2295     dlens[i]  = dcount;  /* d_nzz[i] */
2296     odlens[i] = odcount; /* o_nzz[i] */
2297 
2298     /* zero out the mask elements we set */
2299     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2300     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2301   }
2302 
2303   /* create our matrix */
2304   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2305   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2306   ierr = MatSetType(A,type);CHKERRQ(ierr);
2307   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2308   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2309 
2310   if (!rank) {
2311     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2312     /* read in my part of the matrix numerical values  */
2313     nz = procsnz[0];
2314     vals = buf;
2315     mycols = ibuf;
2316     if (size == 1)  nz -= extra_rows;
2317     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2318     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2319 
2320     /* insert into matrix */
2321     jj      = rstart*bs;
2322     for (i=0; i<m; i++) {
2323       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2324       mycols += locrowlens[i];
2325       vals   += locrowlens[i];
2326       jj++;
2327     }
2328 
2329     /* read in other processors (except the last one) and ship out */
2330     for (i=1; i<size-1; i++) {
2331       nz   = procsnz[i];
2332       vals = buf;
2333       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2334       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2335     }
2336     /* the last proc */
2337     if (size != 1){
2338       nz   = procsnz[i] - extra_rows;
2339       vals = buf;
2340       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2341       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2342       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2343     }
2344     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2345 
2346   } else {
2347     /* receive numeric values */
2348     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2349 
2350     /* receive message of values*/
2351     vals   = buf;
2352     mycols = ibuf;
2353     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2354     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2355     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2356 
2357     /* insert into matrix */
2358     jj      = rstart*bs;
2359     for (i=0; i<m; i++) {
2360       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2361       mycols += locrowlens[i];
2362       vals   += locrowlens[i];
2363       jj++;
2364     }
2365   }
2366 
2367   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2368   ierr = PetscFree(buf);CHKERRQ(ierr);
2369   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2370   ierr = PetscFree(rowners);CHKERRQ(ierr);
2371   ierr = PetscFree(dlens);CHKERRQ(ierr);
2372   ierr = PetscFree(mask);CHKERRQ(ierr);
2373   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2374   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2375   *newmat = A;
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2381 /*XXXXX@
2382    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2383 
2384    Input Parameters:
2385 .  mat  - the matrix
2386 .  fact - factor
2387 
2388    Collective on Mat
2389 
2390    Level: advanced
2391 
2392   Notes:
2393    This can also be set by the command line option: -mat_use_hash_table fact
2394 
2395 .keywords: matrix, hashtable, factor, HT
2396 
2397 .seealso: MatSetOption()
2398 @XXXXX*/
2399 
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2404 {
2405   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2406   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2407   PetscReal      atmp;
2408   PetscReal      *work,*svalues,*rvalues;
2409   PetscErrorCode ierr;
2410   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2411   PetscMPIInt    rank,size;
2412   PetscInt       *rowners_bs,dest,count,source;
2413   PetscScalar    *va;
2414   MatScalar      *ba;
2415   MPI_Status     stat;
2416 
2417   PetscFunctionBegin;
2418   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2419   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2420   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2421 
2422   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2423   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2424 
2425   bs   = A->rmap->bs;
2426   mbs  = a->mbs;
2427   Mbs  = a->Mbs;
2428   ba   = b->a;
2429   bi   = b->i;
2430   bj   = b->j;
2431 
2432   /* find ownerships */
2433   rowners_bs = A->rmap->range;
2434 
2435   /* each proc creates an array to be distributed */
2436   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2437   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2438 
2439   /* row_max for B */
2440   if (rank != size-1){
2441     for (i=0; i<mbs; i++) {
2442       ncols = bi[1] - bi[0]; bi++;
2443       brow  = bs*i;
2444       for (j=0; j<ncols; j++){
2445         bcol = bs*(*bj);
2446         for (kcol=0; kcol<bs; kcol++){
2447           col = bcol + kcol;                 /* local col index */
2448           col += rowners_bs[rank+1];      /* global col index */
2449           for (krow=0; krow<bs; krow++){
2450             atmp = PetscAbsScalar(*ba); ba++;
2451             row = brow + krow;    /* local row index */
2452             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2453             if (work[col] < atmp) work[col] = atmp;
2454           }
2455         }
2456         bj++;
2457       }
2458     }
2459 
2460     /* send values to its owners */
2461     for (dest=rank+1; dest<size; dest++){
2462       svalues = work + rowners_bs[dest];
2463       count   = rowners_bs[dest+1]-rowners_bs[dest];
2464       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2465     }
2466   }
2467 
2468   /* receive values */
2469   if (rank){
2470     rvalues = work;
2471     count   = rowners_bs[rank+1]-rowners_bs[rank];
2472     for (source=0; source<rank; source++){
2473       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2474       /* process values */
2475       for (i=0; i<count; i++){
2476         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2477       }
2478     }
2479   }
2480 
2481   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2482   ierr = PetscFree(work);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatSOR_MPISBAIJ"
2488 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2489 {
2490   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2491   PetscErrorCode ierr;
2492   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2493   PetscScalar    *x,*b,*ptr,*from;
2494   Vec            bb1;
2495 
2496   PetscFunctionBegin;
2497   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2498   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2499 
2500   if (flag == SOR_APPLY_UPPER) {
2501     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2502     PetscFunctionReturn(0);
2503   }
2504 
2505   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2506     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2507       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2508       its--;
2509     }
2510 
2511     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2512     while (its--){
2513 
2514       /* lower triangular part: slvec0b = - B^T*xx */
2515       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2516 
2517       /* copy xx into slvec0a */
2518       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2519       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2520       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2521       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2522 
2523       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2524 
2525       /* copy bb into slvec1a */
2526       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2527       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2528       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2529       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2530 
2531       /* set slvec1b = 0 */
2532       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2533 
2534       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2535       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2536       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2537       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2538 
2539       /* upper triangular part: bb1 = bb1 - B*x */
2540       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2541 
2542       /* local diagonal sweep */
2543       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2544     }
2545     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2546   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2547     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2548   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2549     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2550   } else if (flag & SOR_EISENSTAT) {
2551     Vec               xx1;
2552     PetscTruth        hasop;
2553     const PetscScalar *diag;
2554     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2555     PetscInt          i,n;
2556 
2557     if (!mat->xx1) {
2558       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2559       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2560     }
2561     xx1 = mat->xx1;
2562     bb1 = mat->bb1;
2563 
2564     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2565 
2566     if (!mat->diag) {
2567       /* this is wrong for same matrix with new nonzero values */
2568       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2569       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2570     }
2571     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2572 
2573     if (hasop) {
2574       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2575       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2576     } else {
2577       /*
2578           These two lines are replaced by code that may be a bit faster for a good compiler
2579       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2580       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2581       */
2582       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2583       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2584       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2585       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2586       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2587       if (omega == 1.0) {
2588 	for (i=0; i<n; i++) {
2589 	  sl[i] = b[i] - diag[i]*x[i];
2590 	}
2591         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2592       } else {
2593 	for (i=0; i<n; i++) {
2594 	  sl[i] = b[i] + scale*diag[i]*x[i];
2595 	}
2596         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2597       }
2598       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2599       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2600       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2601       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2602     }
2603 
2604     /* multiply off-diagonal portion of matrix */
2605     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2606     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2607     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2608     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2609     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2610     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2611     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2612     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2613     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2614     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2615 
2616     /* local sweep */
2617     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2618     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2619   } else {
2620     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2621   }
2622   PetscFunctionReturn(0);
2623 }
2624 
2625 #undef __FUNCT__
2626 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2627 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2628 {
2629   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2630   PetscErrorCode ierr;
2631   Vec            lvec1,bb1;
2632 
2633   PetscFunctionBegin;
2634   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2635   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2636 
2637   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2638     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2639       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2640       its--;
2641     }
2642 
2643     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2644     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2645     while (its--){
2646       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2647 
2648       /* lower diagonal part: bb1 = bb - B^T*xx */
2649       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2650       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2651 
2652       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2653       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2654       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2655 
2656       /* upper diagonal part: bb1 = bb1 - B*x */
2657       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2658       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2659 
2660       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2661 
2662       /* diagonal sweep */
2663       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2664     }
2665     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2666     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2667   } else {
2668     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2669   }
2670   PetscFunctionReturn(0);
2671 }
2672 
2673