xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision eeffb40d691afbdd57a8091619e7ddd44ac5fdca)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap->rstart;
146   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
147   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169 
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++  = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend){
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         }
309         else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined (PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined (PETSC_USE_CTABLE)
330 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col =  in[j];
338             }
339           }
340           else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend){
376           col = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined (PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data --;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else {
396       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
397     }
398   }
399  PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "MatNorm_MPISBAIJ"
404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
405 {
406   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
407   PetscErrorCode ierr;
408   PetscReal      sum[2],*lnorm2;
409 
410   PetscFunctionBegin;
411   if (baij->size == 1) {
412     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
413   } else {
414     if (type == NORM_FROBENIUS) {
415       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
416       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
418       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
420       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
421       *norm = sqrt(sum[0] + 2*sum[1]);
422       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
423     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
424       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
425       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
426       PetscReal    *rsum,*rsum2,vabs;
427       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
428       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
429       MatScalar    *v;
430 
431       ierr  = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
432       rsum2 = rsum + mat->cmap->N;
433       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++){
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++){
442             for (row=0; row<bs; row++){
443               vabs = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++){
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++){
459             for (row=0; row<bs; row++){
460               vabs = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree(rsum);CHKERRQ(ierr);
473     } else {
474       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
475     }
476   }
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
482 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
483 {
484   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
485   PetscErrorCode ierr;
486   PetscInt       nstash,reallocs;
487   InsertMode     addv;
488 
489   PetscFunctionBegin;
490   if (baij->donotstash) {
491     PetscFunctionReturn(0);
492   }
493 
494   /* make sure all processors are either in INSERTMODE or ADDMODE */
495   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
496   if (addv == (ADD_VALUES|INSERT_VALUES)) {
497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
498   }
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ"
797 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) {
807     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
808   }
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
834 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt;
839 
840   PetscFunctionBegin;
841   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
842   if (nt != A->cmap->n) {
843     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844   }
845   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
846   if (nt != A->rmap->N) {
847     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
848   }
849 
850   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
851   /* do diagonal part */
852   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
853   /* do supperdiagonal part */
854   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
855   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
856   /* do subdiagonal part */
857   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
858   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
860 
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
866 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
867 {
868   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
869   PetscErrorCode ierr;
870   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
871   PetscScalar    *x,*from,zero=0.0;
872 
873   PetscFunctionBegin;
874   /*
875   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
876   PetscSynchronizedFlush(((PetscObject)A)->comm);
877   */
878   /* diagonal part */
879   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
880   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
881 
882   /* subdiagonal part */
883   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
884 
885   /* copy x into the vec slvec0 */
886   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
887   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
888   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
889   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
890 
891   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
893   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894 
895   /* supperdiagonal part */
896   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
897 
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
903 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
904 {
905   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
906   PetscErrorCode ierr;
907 
908   PetscFunctionBegin;
909   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
910   /* do diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
912   /* do supperdiagonal part */
913   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
914   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
915 
916   /* do subdiagonal part */
917   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
918   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
919   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
920 
921   PetscFunctionReturn(0);
922 }
923 
924 /*
925   This only works correctly for square matrices where the subblock A->A is the
926    diagonal block
927 */
928 #undef __FUNCT__
929 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
930 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
931 {
932   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
933   PetscErrorCode ierr;
934 
935   PetscFunctionBegin;
936   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
937   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatScale_MPISBAIJ"
943 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
944 {
945   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
950   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatGetRow_MPISBAIJ"
956 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
957 {
958   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
959   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
960   PetscErrorCode ierr;
961   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
962   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
963   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
964 
965   PetscFunctionBegin;
966   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
967   mat->getrowactive = PETSC_TRUE;
968 
969   if (!mat->rowvalues && (idx || v)) {
970     /*
971         allocate enough space to hold information from the longest row.
972     */
973     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
974     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
975     PetscInt     max = 1,mbs = mat->mbs,tmp;
976     for (i=0; i<mbs; i++) {
977       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
978       if (max < tmp) { max = tmp; }
979     }
980     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
981     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
982   }
983 
984   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
985   lrow = row - brstart;  /* local row index */
986 
987   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
988   if (!v)   {pvA = 0; pvB = 0;}
989   if (!idx) {pcA = 0; if (!v) pcB = 0;}
990   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
991   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
992   nztot = nzA + nzB;
993 
994   cmap  = mat->garray;
995   if (v  || idx) {
996     if (nztot) {
997       /* Sort by increasing column numbers, assuming A and B already sorted */
998       PetscInt imark = -1;
999       if (v) {
1000         *v = v_p = mat->rowvalues;
1001         for (i=0; i<nzB; i++) {
1002           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1003           else break;
1004         }
1005         imark = i;
1006         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1007         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1008       }
1009       if (idx) {
1010         *idx = idx_p = mat->rowindices;
1011         if (imark > -1) {
1012           for (i=0; i<imark; i++) {
1013             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1014           }
1015         } else {
1016           for (i=0; i<nzB; i++) {
1017             if (cmap[cworkB[i]/bs] < cstart)
1018               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1019             else break;
1020           }
1021           imark = i;
1022         }
1023         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1024         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1025       }
1026     } else {
1027       if (idx) *idx = 0;
1028       if (v)   *v   = 0;
1029     }
1030   }
1031   *nz = nztot;
1032   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1033   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1039 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1040 {
1041   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1042 
1043   PetscFunctionBegin;
1044   if (!baij->getrowactive) {
1045     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1046   }
1047   baij->getrowactive = PETSC_FALSE;
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1053 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1054 {
1055   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1056   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1057 
1058   PetscFunctionBegin;
1059   aA->getrow_utriangular = PETSC_TRUE;
1060   PetscFunctionReturn(0);
1061 }
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1064 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1065 {
1066   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1067   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1068 
1069   PetscFunctionBegin;
1070   aA->getrow_utriangular = PETSC_FALSE;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1076 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1077 {
1078   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1079   PetscErrorCode ierr;
1080 
1081   PetscFunctionBegin;
1082   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1083   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1089 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1090 {
1091   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1092   PetscErrorCode ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1096   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1102 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1103 {
1104   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1109   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1115 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1116 {
1117   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1118   Mat            A = a->A,B = a->B;
1119   PetscErrorCode ierr;
1120   PetscReal      isend[5],irecv[5];
1121 
1122   PetscFunctionBegin;
1123   info->block_size     = (PetscReal)matin->rmap->bs;
1124   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1125   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1126   isend[3] = info->memory;  isend[4] = info->mallocs;
1127   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1128   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1129   isend[3] += info->memory;  isend[4] += info->mallocs;
1130   if (flag == MAT_LOCAL) {
1131     info->nz_used      = isend[0];
1132     info->nz_allocated = isend[1];
1133     info->nz_unneeded  = isend[2];
1134     info->memory       = isend[3];
1135     info->mallocs      = isend[4];
1136   } else if (flag == MAT_GLOBAL_MAX) {
1137     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1138     info->nz_used      = irecv[0];
1139     info->nz_allocated = irecv[1];
1140     info->nz_unneeded  = irecv[2];
1141     info->memory       = irecv[3];
1142     info->mallocs      = irecv[4];
1143   } else if (flag == MAT_GLOBAL_SUM) {
1144     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1145     info->nz_used      = irecv[0];
1146     info->nz_allocated = irecv[1];
1147     info->nz_unneeded  = irecv[2];
1148     info->memory       = irecv[3];
1149     info->mallocs      = irecv[4];
1150   } else {
1151     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1152   }
1153   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1154   info->fill_ratio_needed = 0;
1155   info->factor_mallocs    = 0;
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1161 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1162 {
1163   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1164   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   switch (op) {
1169   case MAT_NEW_NONZERO_LOCATIONS:
1170   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1171   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1172   case MAT_KEEP_NONZERO_PATTERN:
1173   case MAT_NEW_NONZERO_LOCATION_ERR:
1174     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1175     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1176     break;
1177   case MAT_ROW_ORIENTED:
1178     a->roworiented = flg;
1179     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1180     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1181     break;
1182   case MAT_NEW_DIAGONALS:
1183     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1184     break;
1185   case MAT_IGNORE_OFF_PROC_ENTRIES:
1186     a->donotstash = flg;
1187     break;
1188   case MAT_USE_HASH_TABLE:
1189     a->ht_flag = flg;
1190     break;
1191   case MAT_HERMITIAN:
1192     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1193     break;
1194   case MAT_SYMMETRIC:
1195     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1196     break;
1197   case MAT_STRUCTURALLY_SYMMETRIC:
1198     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1199     break;
1200   case MAT_SYMMETRY_ETERNAL:
1201     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1202     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1203     break;
1204   case MAT_IGNORE_LOWER_TRIANGULAR:
1205     aA->ignore_ltriangular = flg;
1206     break;
1207   case MAT_ERROR_LOWER_TRIANGULAR:
1208     aA->ignore_ltriangular = flg;
1209     break;
1210   case MAT_GETROW_UPPERTRIANGULAR:
1211     aA->getrow_utriangular = flg;
1212     break;
1213   default:
1214     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1215   }
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1221 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1222 {
1223   PetscErrorCode ierr;
1224   PetscFunctionBegin;
1225   if (MAT_INITIAL_MATRIX || *B != A) {
1226     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1227   }
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1233 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1234 {
1235   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1236   Mat            a=baij->A, b=baij->B;
1237   PetscErrorCode ierr;
1238   PetscInt       nv,m,n;
1239   PetscTruth     flg;
1240 
1241   PetscFunctionBegin;
1242   if (ll != rr){
1243     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1244     if (!flg)
1245       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1246   }
1247   if (!ll) PetscFunctionReturn(0);
1248 
1249   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1250   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1251 
1252   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1253   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1254 
1255   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1256 
1257   /* left diagonalscale the off-diagonal part */
1258   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1259 
1260   /* scale the diagonal part */
1261   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1262 
1263   /* right diagonalscale the off-diagonal part */
1264   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1271 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1272 {
1273   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatEqual_MPISBAIJ"
1285 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1286 {
1287   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1288   Mat            a,b,c,d;
1289   PetscTruth     flg;
1290   PetscErrorCode ierr;
1291 
1292   PetscFunctionBegin;
1293   a = matA->A; b = matA->B;
1294   c = matB->A; d = matB->B;
1295 
1296   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1297   if (flg) {
1298     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1299   }
1300   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatCopy_MPISBAIJ"
1306 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1307 {
1308   PetscErrorCode ierr;
1309   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1310   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1311 
1312   PetscFunctionBegin;
1313   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1314   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1315     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1316     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1317     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1318   } else {
1319     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1320     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1327 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1328 {
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #include "petscblaslapack.h"
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1339 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1340 {
1341   PetscErrorCode ierr;
1342   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1343   PetscBLASInt   bnz,one=1;
1344   Mat_SeqSBAIJ   *xa,*ya;
1345   Mat_SeqBAIJ    *xb,*yb;
1346 
1347   PetscFunctionBegin;
1348   if (str == SAME_NONZERO_PATTERN) {
1349     PetscScalar alpha = a;
1350     xa = (Mat_SeqSBAIJ *)xx->A->data;
1351     ya = (Mat_SeqSBAIJ *)yy->A->data;
1352     bnz = PetscBLASIntCast(xa->nz);
1353     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1354     xb = (Mat_SeqBAIJ *)xx->B->data;
1355     yb = (Mat_SeqBAIJ *)yy->B->data;
1356     bnz = PetscBLASIntCast(xb->nz);
1357     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1358   } else {
1359     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1360     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1361     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1368 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1369 {
1370   PetscErrorCode ierr;
1371   PetscInt       i;
1372   PetscTruth     flg;
1373 
1374   PetscFunctionBegin;
1375   for (i=0; i<n; i++) {
1376     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1377     if (!flg) {
1378       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1379     }
1380   }
1381   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 
1386 /* -------------------------------------------------------------------*/
1387 static struct _MatOps MatOps_Values = {
1388        MatSetValues_MPISBAIJ,
1389        MatGetRow_MPISBAIJ,
1390        MatRestoreRow_MPISBAIJ,
1391        MatMult_MPISBAIJ,
1392 /* 4*/ MatMultAdd_MPISBAIJ,
1393        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1394        MatMultAdd_MPISBAIJ,
1395        0,
1396        0,
1397        0,
1398 /*10*/ 0,
1399        0,
1400        0,
1401        MatSOR_MPISBAIJ,
1402        MatTranspose_MPISBAIJ,
1403 /*15*/ MatGetInfo_MPISBAIJ,
1404        MatEqual_MPISBAIJ,
1405        MatGetDiagonal_MPISBAIJ,
1406        MatDiagonalScale_MPISBAIJ,
1407        MatNorm_MPISBAIJ,
1408 /*20*/ MatAssemblyBegin_MPISBAIJ,
1409        MatAssemblyEnd_MPISBAIJ,
1410        MatSetOption_MPISBAIJ,
1411        MatZeroEntries_MPISBAIJ,
1412 /*24*/ 0,
1413        0,
1414        0,
1415        0,
1416        0,
1417 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1418        0,
1419        0,
1420        0,
1421        0,
1422 /*34*/ MatDuplicate_MPISBAIJ,
1423        0,
1424        0,
1425        0,
1426        0,
1427 /*39*/ MatAXPY_MPISBAIJ,
1428        MatGetSubMatrices_MPISBAIJ,
1429        MatIncreaseOverlap_MPISBAIJ,
1430        MatGetValues_MPISBAIJ,
1431        MatCopy_MPISBAIJ,
1432 /*44*/ 0,
1433        MatScale_MPISBAIJ,
1434        0,
1435        0,
1436        0,
1437 /*49*/ 0,
1438        0,
1439        0,
1440        0,
1441        0,
1442 /*54*/ 0,
1443        0,
1444        MatSetUnfactored_MPISBAIJ,
1445        0,
1446        MatSetValuesBlocked_MPISBAIJ,
1447 /*59*/ 0,
1448        0,
1449        0,
1450        0,
1451        0,
1452 /*64*/ 0,
1453        0,
1454        0,
1455        0,
1456        0,
1457 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1458        0,
1459        0,
1460        0,
1461        0,
1462 /*74*/ 0,
1463        0,
1464        0,
1465        0,
1466        0,
1467 /*79*/ 0,
1468        0,
1469        0,
1470        0,
1471        MatLoad_MPISBAIJ,
1472 /*84*/ 0,
1473        0,
1474        0,
1475        0,
1476        0,
1477 /*89*/ 0,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*94*/ 0,
1483        0,
1484        0,
1485        0,
1486        0,
1487 /*99*/ 0,
1488        0,
1489        0,
1490        0,
1491        0,
1492 /*104*/0,
1493        MatRealPart_MPISBAIJ,
1494        MatImaginaryPart_MPISBAIJ,
1495        MatGetRowUpperTriangular_MPISBAIJ,
1496        MatRestoreRowUpperTriangular_MPISBAIJ
1497 };
1498 
1499 
1500 EXTERN_C_BEGIN
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1503 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1504 {
1505   PetscFunctionBegin;
1506   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1507   *iscopy = PETSC_FALSE;
1508   PetscFunctionReturn(0);
1509 }
1510 EXTERN_C_END
1511 
1512 EXTERN_C_BEGIN
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1515 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1516 {
1517   Mat_MPISBAIJ   *b;
1518   PetscErrorCode ierr;
1519   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1520 
1521   PetscFunctionBegin;
1522   if (bs < 0){
1523     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1524       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1525     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1526     bs   = PetscAbs(bs);
1527   }
1528   if ((d_nnz || o_nnz) && newbs != bs) {
1529     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1530   }
1531   bs = newbs;
1532 
1533   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1534   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1535   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1536   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1537 
1538   B->rmap->bs = B->cmap->bs = bs;
1539   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1540   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1541 
1542   if (d_nnz) {
1543     for (i=0; i<B->rmap->n/bs; i++) {
1544       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1545     }
1546   }
1547   if (o_nnz) {
1548     for (i=0; i<B->rmap->n/bs; i++) {
1549       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1550     }
1551   }
1552 
1553   b   = (Mat_MPISBAIJ*)B->data;
1554   mbs = B->rmap->n/bs;
1555   Mbs = B->rmap->N/bs;
1556   if (mbs*bs != B->rmap->n) {
1557     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1558   }
1559 
1560   B->rmap->bs  = bs;
1561   b->bs2 = bs*bs;
1562   b->mbs = mbs;
1563   b->nbs = mbs;
1564   b->Mbs = Mbs;
1565   b->Nbs = Mbs;
1566 
1567   for (i=0; i<=b->size; i++) {
1568     b->rangebs[i] = B->rmap->range[i]/bs;
1569   }
1570   b->rstartbs = B->rmap->rstart/bs;
1571   b->rendbs   = B->rmap->rend/bs;
1572 
1573   b->cstartbs = B->cmap->rstart/bs;
1574   b->cendbs   = B->cmap->rend/bs;
1575 
1576   if (!B->preallocated) {
1577     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1578     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1579     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1580     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1581     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1582     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1583     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1584     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1585     /* build cache for off array entries formed */
1586     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1587   }
1588 
1589   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1590   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1591   B->preallocated = PETSC_TRUE;
1592   PetscFunctionReturn(0);
1593 }
1594 EXTERN_C_END
1595 
1596 EXTERN_C_BEGIN
1597 #if defined(PETSC_HAVE_MUMPS)
1598 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1599 #endif
1600 #if defined(PETSC_HAVE_SPOOLES)
1601 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1602 #endif
1603 #if defined(PETSC_HAVE_PASTIX)
1604 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1605 #endif
1606 EXTERN_C_END
1607 
1608 /*MC
1609    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1610    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1611 
1612    Options Database Keys:
1613 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1614 
1615   Level: beginner
1616 
1617 .seealso: MatCreateMPISBAIJ
1618 M*/
1619 
1620 EXTERN_C_BEGIN
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatCreate_MPISBAIJ"
1623 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1624 {
1625   Mat_MPISBAIJ   *b;
1626   PetscErrorCode ierr;
1627   PetscTruth     flg;
1628 
1629   PetscFunctionBegin;
1630 
1631   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1632   B->data = (void*)b;
1633   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1634 
1635   B->ops->destroy    = MatDestroy_MPISBAIJ;
1636   B->ops->view       = MatView_MPISBAIJ;
1637   B->mapping         = 0;
1638   B->assembled       = PETSC_FALSE;
1639 
1640   B->insertmode = NOT_SET_VALUES;
1641   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1642   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1643 
1644   /* build local table of row and column ownerships */
1645   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1646 
1647   /* build cache for off array entries formed */
1648   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1649   b->donotstash  = PETSC_FALSE;
1650   b->colmap      = PETSC_NULL;
1651   b->garray      = PETSC_NULL;
1652   b->roworiented = PETSC_TRUE;
1653 
1654   /* stuff used in block assembly */
1655   b->barray       = 0;
1656 
1657   /* stuff used for matrix vector multiply */
1658   b->lvec         = 0;
1659   b->Mvctx        = 0;
1660   b->slvec0       = 0;
1661   b->slvec0b      = 0;
1662   b->slvec1       = 0;
1663   b->slvec1a      = 0;
1664   b->slvec1b      = 0;
1665   b->sMvctx       = 0;
1666 
1667   /* stuff for MatGetRow() */
1668   b->rowindices   = 0;
1669   b->rowvalues    = 0;
1670   b->getrowactive = PETSC_FALSE;
1671 
1672   /* hash table stuff */
1673   b->ht           = 0;
1674   b->hd           = 0;
1675   b->ht_size      = 0;
1676   b->ht_flag      = PETSC_FALSE;
1677   b->ht_fact      = 0;
1678   b->ht_total_ct  = 0;
1679   b->ht_insert_ct = 0;
1680 
1681   b->in_loc       = 0;
1682   b->v_loc        = 0;
1683   b->n_loc        = 0;
1684   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1685     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1686     if (flg) {
1687       PetscReal fact = 1.39;
1688       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1689       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1690       if (fact <= 1.0) fact = 1.39;
1691       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1692       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1693     }
1694   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1695 
1696 #if defined(PETSC_HAVE_PASTIX)
1697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1698 					   "MatGetFactor_mpisbaij_pastix",
1699 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1700 #endif
1701 #if defined(PETSC_HAVE_MUMPS)
1702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1703                                      "MatGetFactor_mpisbaij_mumps",
1704                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1705 #endif
1706 #if defined(PETSC_HAVE_SPOOLES)
1707   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1708                                      "MatGetFactor_mpisbaij_spooles",
1709                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1710 #endif
1711   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1712                                      "MatStoreValues_MPISBAIJ",
1713                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1714   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1715                                      "MatRetrieveValues_MPISBAIJ",
1716                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1717   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1718                                      "MatGetDiagonalBlock_MPISBAIJ",
1719                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1720   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1721                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1722                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1723   B->symmetric                  = PETSC_TRUE;
1724   B->structurally_symmetric     = PETSC_TRUE;
1725   B->symmetric_set              = PETSC_TRUE;
1726   B->structurally_symmetric_set = PETSC_TRUE;
1727   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1728   PetscFunctionReturn(0);
1729 }
1730 EXTERN_C_END
1731 
1732 /*MC
1733    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1734 
1735    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1736    and MATMPISBAIJ otherwise.
1737 
1738    Options Database Keys:
1739 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1740 
1741   Level: beginner
1742 
1743 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1744 M*/
1745 
1746 EXTERN_C_BEGIN
1747 #undef __FUNCT__
1748 #define __FUNCT__ "MatCreate_SBAIJ"
1749 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1750 {
1751   PetscErrorCode ierr;
1752   PetscMPIInt    size;
1753 
1754   PetscFunctionBegin;
1755   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1756   if (size == 1) {
1757     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1758   } else {
1759     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1760   }
1761   PetscFunctionReturn(0);
1762 }
1763 EXTERN_C_END
1764 
1765 #undef __FUNCT__
1766 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1767 /*@C
1768    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1769    the user should preallocate the matrix storage by setting the parameters
1770    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1771    performance can be increased by more than a factor of 50.
1772 
1773    Collective on Mat
1774 
1775    Input Parameters:
1776 +  A - the matrix
1777 .  bs   - size of blockk
1778 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1779            submatrix  (same for all local rows)
1780 .  d_nnz - array containing the number of block nonzeros in the various block rows
1781            in the upper triangular and diagonal part of the in diagonal portion of the local
1782            (possibly different for each block row) or PETSC_NULL.  You must leave room
1783            for the diagonal entry even if it is zero.
1784 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1785            submatrix (same for all local rows).
1786 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1787            off-diagonal portion of the local submatrix (possibly different for
1788            each block row) or PETSC_NULL.
1789 
1790 
1791    Options Database Keys:
1792 .   -mat_no_unroll - uses code that does not unroll the loops in the
1793                      block calculations (much slower)
1794 .   -mat_block_size - size of the blocks to use
1795 
1796    Notes:
1797 
1798    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1799    than it must be used on all processors that share the object for that argument.
1800 
1801    If the *_nnz parameter is given then the *_nz parameter is ignored
1802 
1803    Storage Information:
1804    For a square global matrix we define each processor's diagonal portion
1805    to be its local rows and the corresponding columns (a square submatrix);
1806    each processor's off-diagonal portion encompasses the remainder of the
1807    local matrix (a rectangular submatrix).
1808 
1809    The user can specify preallocated storage for the diagonal part of
1810    the local submatrix with either d_nz or d_nnz (not both).  Set
1811    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1812    memory allocation.  Likewise, specify preallocated storage for the
1813    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1814 
1815    You can call MatGetInfo() to get information on how effective the preallocation was;
1816    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1817    You can also run with the option -info and look for messages with the string
1818    malloc in them to see if additional memory allocation was needed.
1819 
1820    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1821    the figure below we depict these three local rows and all columns (0-11).
1822 
1823 .vb
1824            0 1 2 3 4 5 6 7 8 9 10 11
1825           -------------------
1826    row 3  |  o o o d d d o o o o o o
1827    row 4  |  o o o d d d o o o o o o
1828    row 5  |  o o o d d d o o o o o o
1829           -------------------
1830 .ve
1831 
1832    Thus, any entries in the d locations are stored in the d (diagonal)
1833    submatrix, and any entries in the o locations are stored in the
1834    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1835    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1836 
1837    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1838    plus the diagonal part of the d matrix,
1839    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1840    In general, for PDE problems in which most nonzeros are near the diagonal,
1841    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1842    or you will get TERRIBLE performance; see the users' manual chapter on
1843    matrices.
1844 
1845    Level: intermediate
1846 
1847 .keywords: matrix, block, aij, compressed row, sparse, parallel
1848 
1849 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1850 @*/
1851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1852 {
1853   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1854 
1855   PetscFunctionBegin;
1856   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1857   if (f) {
1858     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1859   }
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatCreateMPISBAIJ"
1865 /*@C
1866    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1867    (block compressed row).  For good matrix assembly performance
1868    the user should preallocate the matrix storage by setting the parameters
1869    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1870    performance can be increased by more than a factor of 50.
1871 
1872    Collective on MPI_Comm
1873 
1874    Input Parameters:
1875 +  comm - MPI communicator
1876 .  bs   - size of blockk
1877 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1878            This value should be the same as the local size used in creating the
1879            y vector for the matrix-vector product y = Ax.
1880 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1881            This value should be the same as the local size used in creating the
1882            x vector for the matrix-vector product y = Ax.
1883 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1884 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1885 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1886            submatrix  (same for all local rows)
1887 .  d_nnz - array containing the number of block nonzeros in the various block rows
1888            in the upper triangular portion of the in diagonal portion of the local
1889            (possibly different for each block block row) or PETSC_NULL.
1890            You must leave room for the diagonal entry even if it is zero.
1891 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1892            submatrix (same for all local rows).
1893 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1894            off-diagonal portion of the local submatrix (possibly different for
1895            each block row) or PETSC_NULL.
1896 
1897    Output Parameter:
1898 .  A - the matrix
1899 
1900    Options Database Keys:
1901 .   -mat_no_unroll - uses code that does not unroll the loops in the
1902                      block calculations (much slower)
1903 .   -mat_block_size - size of the blocks to use
1904 .   -mat_mpi - use the parallel matrix data structures even on one processor
1905                (defaults to using SeqBAIJ format on one processor)
1906 
1907    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1908    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1909    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1910 
1911    Notes:
1912    The number of rows and columns must be divisible by blocksize.
1913    This matrix type does not support complex Hermitian operation.
1914 
1915    The user MUST specify either the local or global matrix dimensions
1916    (possibly both).
1917 
1918    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1919    than it must be used on all processors that share the object for that argument.
1920 
1921    If the *_nnz parameter is given then the *_nz parameter is ignored
1922 
1923    Storage Information:
1924    For a square global matrix we define each processor's diagonal portion
1925    to be its local rows and the corresponding columns (a square submatrix);
1926    each processor's off-diagonal portion encompasses the remainder of the
1927    local matrix (a rectangular submatrix).
1928 
1929    The user can specify preallocated storage for the diagonal part of
1930    the local submatrix with either d_nz or d_nnz (not both).  Set
1931    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1932    memory allocation.  Likewise, specify preallocated storage for the
1933    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1934 
1935    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1936    the figure below we depict these three local rows and all columns (0-11).
1937 
1938 .vb
1939            0 1 2 3 4 5 6 7 8 9 10 11
1940           -------------------
1941    row 3  |  o o o d d d o o o o o o
1942    row 4  |  o o o d d d o o o o o o
1943    row 5  |  o o o d d d o o o o o o
1944           -------------------
1945 .ve
1946 
1947    Thus, any entries in the d locations are stored in the d (diagonal)
1948    submatrix, and any entries in the o locations are stored in the
1949    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1950    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1951 
1952    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1953    plus the diagonal part of the d matrix,
1954    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1955    In general, for PDE problems in which most nonzeros are near the diagonal,
1956    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1957    or you will get TERRIBLE performance; see the users' manual chapter on
1958    matrices.
1959 
1960    Level: intermediate
1961 
1962 .keywords: matrix, block, aij, compressed row, sparse, parallel
1963 
1964 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1965 @*/
1966 
1967 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1968 {
1969   PetscErrorCode ierr;
1970   PetscMPIInt    size;
1971 
1972   PetscFunctionBegin;
1973   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1974   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1975   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1976   if (size > 1) {
1977     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1978     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1979   } else {
1980     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1981     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1982   }
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1989 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1990 {
1991   Mat            mat;
1992   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1993   PetscErrorCode ierr;
1994   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
1995   PetscScalar    *array;
1996 
1997   PetscFunctionBegin;
1998   *newmat       = 0;
1999   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2000   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2001   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2002   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2003   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2004   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2005 
2006   mat->factor       = matin->factor;
2007   mat->preallocated = PETSC_TRUE;
2008   mat->assembled    = PETSC_TRUE;
2009   mat->insertmode   = NOT_SET_VALUES;
2010 
2011   a = (Mat_MPISBAIJ*)mat->data;
2012   a->bs2   = oldmat->bs2;
2013   a->mbs   = oldmat->mbs;
2014   a->nbs   = oldmat->nbs;
2015   a->Mbs   = oldmat->Mbs;
2016   a->Nbs   = oldmat->Nbs;
2017 
2018 
2019   a->size         = oldmat->size;
2020   a->rank         = oldmat->rank;
2021   a->donotstash   = oldmat->donotstash;
2022   a->roworiented  = oldmat->roworiented;
2023   a->rowindices   = 0;
2024   a->rowvalues    = 0;
2025   a->getrowactive = PETSC_FALSE;
2026   a->barray       = 0;
2027   a->rstartbs    = oldmat->rstartbs;
2028   a->rendbs      = oldmat->rendbs;
2029   a->cstartbs    = oldmat->cstartbs;
2030   a->cendbs      = oldmat->cendbs;
2031 
2032   /* hash table stuff */
2033   a->ht           = 0;
2034   a->hd           = 0;
2035   a->ht_size      = 0;
2036   a->ht_flag      = oldmat->ht_flag;
2037   a->ht_fact      = oldmat->ht_fact;
2038   a->ht_total_ct  = 0;
2039   a->ht_insert_ct = 0;
2040 
2041   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2042   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2043   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2044   if (oldmat->colmap) {
2045 #if defined (PETSC_USE_CTABLE)
2046     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2047 #else
2048     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2049     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2050     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2051 #endif
2052   } else a->colmap = 0;
2053 
2054   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2055     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2056     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2057     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2058   } else a->garray = 0;
2059 
2060   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2061   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2062   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2063   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2064 
2065   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2066   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2067   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2068   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2069 
2070   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2071   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2072   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2073   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2074   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2075   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2076   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2077   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2078   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2079   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2080   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2081   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2082   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2083 
2084   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2085   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2086   a->sMvctx = oldmat->sMvctx;
2087   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2088 
2089   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2090   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2091   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2092   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2093   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2094   *newmat = mat;
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #include "petscsys.h"
2099 
2100 #undef __FUNCT__
2101 #define __FUNCT__ "MatLoad_MPISBAIJ"
2102 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2103 {
2104   Mat            A;
2105   PetscErrorCode ierr;
2106   PetscInt       i,nz,j,rstart,rend;
2107   PetscScalar    *vals,*buf;
2108   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2109   MPI_Status     status;
2110   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2111   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2112   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2113   PetscInt       bs=1,Mbs,mbs,extra_rows;
2114   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2115   PetscInt       dcount,kmax,k,nzcount,tmp;
2116   int            fd;
2117 
2118   PetscFunctionBegin;
2119   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2120     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2121   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2122 
2123   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2124   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2125   if (!rank) {
2126     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2127     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2128     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2129     if (header[3] < 0) {
2130       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2131     }
2132   }
2133 
2134   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2135   M = header[1]; N = header[2];
2136 
2137   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2138 
2139   /*
2140      This code adds extra rows to make sure the number of rows is
2141      divisible by the blocksize
2142   */
2143   Mbs        = M/bs;
2144   extra_rows = bs - M + bs*(Mbs);
2145   if (extra_rows == bs) extra_rows = 0;
2146   else                  Mbs++;
2147   if (extra_rows &&!rank) {
2148     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2149   }
2150 
2151   /* determine ownership of all rows */
2152   mbs        = Mbs/size + ((Mbs % size) > rank);
2153   m          = mbs*bs;
2154   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2155   browners   = rowners + size + 1;
2156   mmbs       = PetscMPIIntCast(mbs);
2157   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2158   rowners[0] = 0;
2159   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2160   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2161   rstart = rowners[rank];
2162   rend   = rowners[rank+1];
2163 
2164   /* distribute row lengths to all processors */
2165   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2166   if (!rank) {
2167     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2168     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2169     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2170     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2171     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2172     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2173     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2174   } else {
2175     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2176   }
2177 
2178   if (!rank) {   /* procs[0] */
2179     /* calculate the number of nonzeros on each processor */
2180     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2181     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2182     for (i=0; i<size; i++) {
2183       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2184         procsnz[i] += rowlengths[j];
2185       }
2186     }
2187     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2188 
2189     /* determine max buffer needed and allocate it */
2190     maxnz = 0;
2191     for (i=0; i<size; i++) {
2192       maxnz = PetscMax(maxnz,procsnz[i]);
2193     }
2194     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2195 
2196     /* read in my part of the matrix column indices  */
2197     nz     = procsnz[0];
2198     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2199     mycols = ibuf;
2200     if (size == 1)  nz -= extra_rows;
2201     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2202     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2203 
2204     /* read in every ones (except the last) and ship off */
2205     for (i=1; i<size-1; i++) {
2206       nz   = procsnz[i];
2207       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2208       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2209     }
2210     /* read in the stuff for the last proc */
2211     if (size != 1) {
2212       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2213       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2214       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2215       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2216     }
2217     ierr = PetscFree(cols);CHKERRQ(ierr);
2218   } else {  /* procs[i], i>0 */
2219     /* determine buffer space needed for message */
2220     nz = 0;
2221     for (i=0; i<m; i++) {
2222       nz += locrowlens[i];
2223     }
2224     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2225     mycols = ibuf;
2226     /* receive message of column indices*/
2227     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2228     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2229     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2230   }
2231 
2232   /* loop over local rows, determining number of off diagonal entries */
2233   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2234   odlens   = dlens + (rend-rstart);
2235   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2236   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2237   masked1  = mask    + Mbs;
2238   masked2  = masked1 + Mbs;
2239   rowcount = 0; nzcount = 0;
2240   for (i=0; i<mbs; i++) {
2241     dcount  = 0;
2242     odcount = 0;
2243     for (j=0; j<bs; j++) {
2244       kmax = locrowlens[rowcount];
2245       for (k=0; k<kmax; k++) {
2246         tmp = mycols[nzcount++]/bs; /* block col. index */
2247         if (!mask[tmp]) {
2248           mask[tmp] = 1;
2249           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2250           else masked1[dcount++] = tmp; /* entry in diag portion */
2251         }
2252       }
2253       rowcount++;
2254     }
2255 
2256     dlens[i]  = dcount;  /* d_nzz[i] */
2257     odlens[i] = odcount; /* o_nzz[i] */
2258 
2259     /* zero out the mask elements we set */
2260     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2261     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2262   }
2263 
2264   /* create our matrix */
2265   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2266   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2267   ierr = MatSetType(A,type);CHKERRQ(ierr);
2268   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2269   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2270 
2271   if (!rank) {
2272     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2273     /* read in my part of the matrix numerical values  */
2274     nz = procsnz[0];
2275     vals = buf;
2276     mycols = ibuf;
2277     if (size == 1)  nz -= extra_rows;
2278     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2279     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2280 
2281     /* insert into matrix */
2282     jj      = rstart*bs;
2283     for (i=0; i<m; i++) {
2284       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2285       mycols += locrowlens[i];
2286       vals   += locrowlens[i];
2287       jj++;
2288     }
2289 
2290     /* read in other processors (except the last one) and ship out */
2291     for (i=1; i<size-1; i++) {
2292       nz   = procsnz[i];
2293       vals = buf;
2294       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2295       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2296     }
2297     /* the last proc */
2298     if (size != 1){
2299       nz   = procsnz[i] - extra_rows;
2300       vals = buf;
2301       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2302       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2303       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2304     }
2305     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2306 
2307   } else {
2308     /* receive numeric values */
2309     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2310 
2311     /* receive message of values*/
2312     vals   = buf;
2313     mycols = ibuf;
2314     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2315     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2316     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2317 
2318     /* insert into matrix */
2319     jj      = rstart*bs;
2320     for (i=0; i<m; i++) {
2321       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2322       mycols += locrowlens[i];
2323       vals   += locrowlens[i];
2324       jj++;
2325     }
2326   }
2327 
2328   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2329   ierr = PetscFree(buf);CHKERRQ(ierr);
2330   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2331   ierr = PetscFree(rowners);CHKERRQ(ierr);
2332   ierr = PetscFree(dlens);CHKERRQ(ierr);
2333   ierr = PetscFree(mask);CHKERRQ(ierr);
2334   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2335   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2336   *newmat = A;
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2342 /*XXXXX@
2343    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2344 
2345    Input Parameters:
2346 .  mat  - the matrix
2347 .  fact - factor
2348 
2349    Collective on Mat
2350 
2351    Level: advanced
2352 
2353   Notes:
2354    This can also be set by the command line option: -mat_use_hash_table fact
2355 
2356 .keywords: matrix, hashtable, factor, HT
2357 
2358 .seealso: MatSetOption()
2359 @XXXXX*/
2360 
2361 
2362 #undef __FUNCT__
2363 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2364 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2365 {
2366   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2367   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2368   PetscReal      atmp;
2369   PetscReal      *work,*svalues,*rvalues;
2370   PetscErrorCode ierr;
2371   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2372   PetscMPIInt    rank,size;
2373   PetscInt       *rowners_bs,dest,count,source;
2374   PetscScalar    *va;
2375   MatScalar      *ba;
2376   MPI_Status     stat;
2377 
2378   PetscFunctionBegin;
2379   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2380   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2381   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2382 
2383   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2384   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2385 
2386   bs   = A->rmap->bs;
2387   mbs  = a->mbs;
2388   Mbs  = a->Mbs;
2389   ba   = b->a;
2390   bi   = b->i;
2391   bj   = b->j;
2392 
2393   /* find ownerships */
2394   rowners_bs = A->rmap->range;
2395 
2396   /* each proc creates an array to be distributed */
2397   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2398   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2399 
2400   /* row_max for B */
2401   if (rank != size-1){
2402     for (i=0; i<mbs; i++) {
2403       ncols = bi[1] - bi[0]; bi++;
2404       brow  = bs*i;
2405       for (j=0; j<ncols; j++){
2406         bcol = bs*(*bj);
2407         for (kcol=0; kcol<bs; kcol++){
2408           col = bcol + kcol;                 /* local col index */
2409           col += rowners_bs[rank+1];      /* global col index */
2410           for (krow=0; krow<bs; krow++){
2411             atmp = PetscAbsScalar(*ba); ba++;
2412             row = brow + krow;    /* local row index */
2413             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2414             if (work[col] < atmp) work[col] = atmp;
2415           }
2416         }
2417         bj++;
2418       }
2419     }
2420 
2421     /* send values to its owners */
2422     for (dest=rank+1; dest<size; dest++){
2423       svalues = work + rowners_bs[dest];
2424       count   = rowners_bs[dest+1]-rowners_bs[dest];
2425       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2426     }
2427   }
2428 
2429   /* receive values */
2430   if (rank){
2431     rvalues = work;
2432     count   = rowners_bs[rank+1]-rowners_bs[rank];
2433     for (source=0; source<rank; source++){
2434       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2435       /* process values */
2436       for (i=0; i<count; i++){
2437         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2438       }
2439     }
2440   }
2441 
2442   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2443   ierr = PetscFree(work);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatSOR_MPISBAIJ"
2449 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2450 {
2451   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2452   PetscErrorCode ierr;
2453   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2454   PetscScalar    *x,*b,*ptr,*from;
2455   Vec            bb1;
2456 
2457   PetscFunctionBegin;
2458   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2459   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2460 
2461   if (flag == SOR_APPLY_UPPER) {
2462     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2463     PetscFunctionReturn(0);
2464   }
2465 
2466   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2467     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2468       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2469       its--;
2470     }
2471 
2472     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2473     while (its--){
2474 
2475       /* lower triangular part: slvec0b = - B^T*xx */
2476       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2477 
2478       /* copy xx into slvec0a */
2479       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2480       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2481       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2482       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2483 
2484       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2485 
2486       /* copy bb into slvec1a */
2487       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2488       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2489       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2490       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2491 
2492       /* set slvec1b = 0 */
2493       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2494 
2495       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2496       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2497       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2498       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2499 
2500       /* upper triangular part: bb1 = bb1 - B*x */
2501       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2502 
2503       /* local diagonal sweep */
2504       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2505     }
2506     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2507   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2508     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2509   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2510     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2511   } else if (flag & SOR_EISENSTAT) {
2512     Vec               xx1;
2513     PetscTruth        hasop;
2514     const PetscScalar *diag;
2515     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2516     PetscInt          i,n;
2517 
2518     if (!mat->xx1) {
2519       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2520       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2521     }
2522     xx1 = mat->xx1;
2523     bb1 = mat->bb1;
2524 
2525     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2526 
2527     if (!mat->diag) {
2528       /* this is wrong for same matrix with new nonzero values */
2529       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2530       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2531     }
2532     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2533 
2534     if (hasop) {
2535       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2536       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2537     } else {
2538       /*
2539           These two lines are replaced by code that may be a bit faster for a good compiler
2540       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2541       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2542       */
2543       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2544       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2545       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2546       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2547       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2548       if (omega == 1.0) {
2549 	for (i=0; i<n; i++) {
2550 	  sl[i] = b[i] - diag[i]*x[i];
2551 	}
2552         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2553       } else {
2554 	for (i=0; i<n; i++) {
2555 	  sl[i] = b[i] + scale*diag[i]*x[i];
2556 	}
2557         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2558       }
2559       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2560       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2561       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2562       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2563     }
2564 
2565     /* multiply off-diagonal portion of matrix */
2566     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2567     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2568     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2569     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2570     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2571     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2572     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2573     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2574     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2575     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2576 
2577     /* local sweep */
2578     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2579     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2580   } else {
2581     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2582   }
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2588 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2589 {
2590   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2591   PetscErrorCode ierr;
2592   Vec            lvec1,bb1;
2593 
2594   PetscFunctionBegin;
2595   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2596   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2597 
2598   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2599     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2600       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2601       its--;
2602     }
2603 
2604     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2605     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2606     while (its--){
2607       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2608 
2609       /* lower diagonal part: bb1 = bb - B^T*xx */
2610       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2611       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2612 
2613       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2614       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2615       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2616 
2617       /* upper diagonal part: bb1 = bb1 - B*x */
2618       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2619       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2620 
2621       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2622 
2623       /* diagonal sweep */
2624       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2625     }
2626     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2627     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2628   } else {
2629     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2630   }
2631   PetscFunctionReturn(0);
2632 }
2633 
2634