xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision c05d87d6c2fa1d0952fe676de20339d09daa79bc)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap->rstart;
146   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
147   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169   if (v) PetscValidScalarPointer(v,6);
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++  = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend){
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         }
309         else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined (PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined (PETSC_USE_CTABLE)
330 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col =  in[j];
338             }
339           }
340           else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend){
376           col = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined (PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data --;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else {
396       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
397     }
398   }
399  PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "MatNorm_MPISBAIJ"
404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
405 {
406   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
407   PetscErrorCode ierr;
408   PetscReal      sum[2],*lnorm2;
409 
410   PetscFunctionBegin;
411   if (baij->size == 1) {
412     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
413   } else {
414     if (type == NORM_FROBENIUS) {
415       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
416       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
418       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
420       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
421       *norm = sqrt(sum[0] + 2*sum[1]);
422       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
423     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
424       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
425       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
426       PetscReal    *rsum,*rsum2,vabs;
427       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
428       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
429       MatScalar    *v;
430 
431       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
432       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
433       /* Amat */
434       v = amat->a; jj = amat->j;
435       for (brow=0; brow<mbs; brow++) {
436         grow = bs*(rstart + brow);
437         nz = amat->i[brow+1] - amat->i[brow];
438         for (bcol=0; bcol<nz; bcol++){
439           gcol = bs*(rstart + *jj); jj++;
440           for (col=0; col<bs; col++){
441             for (row=0; row<bs; row++){
442               vabs = PetscAbsScalar(*v); v++;
443               rsum[gcol+col] += vabs;
444               /* non-diagonal block */
445               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
446             }
447           }
448         }
449       }
450       /* Bmat */
451       v = bmat->a; jj = bmat->j;
452       for (brow=0; brow<mbs; brow++) {
453         grow = bs*(rstart + brow);
454         nz = bmat->i[brow+1] - bmat->i[brow];
455         for (bcol=0; bcol<nz; bcol++){
456           gcol = bs*garray[*jj]; jj++;
457           for (col=0; col<bs; col++){
458             for (row=0; row<bs; row++){
459               vabs = PetscAbsScalar(*v); v++;
460               rsum[gcol+col] += vabs;
461               rsum[grow+row] += vabs;
462             }
463           }
464         }
465       }
466       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
467       *norm = 0.0;
468       for (col=0; col<mat->cmap->N; col++) {
469         if (rsum2[col] > *norm) *norm = rsum2[col];
470       }
471       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
472     } else {
473       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
474     }
475   }
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
481 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
484   PetscErrorCode ierr;
485   PetscInt       nstash,reallocs;
486   InsertMode     addv;
487 
488   PetscFunctionBegin;
489   if (baij->donotstash) {
490     PetscFunctionReturn(0);
491   }
492 
493   /* make sure all processors are either in INSERTMODE or ADDMODE */
494   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
495   if (addv == (ADD_VALUES|INSERT_VALUES)) {
496     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
497   }
498   mat->insertmode = addv; /* in case this processor had no cache */
499 
500   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
501   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
502   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
503   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
504   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
505   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
511 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
512 {
513   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
514   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
515   PetscErrorCode ierr;
516   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
517   PetscInt       *row,*col;
518   PetscTruth     other_disassembled;
519   PetscMPIInt    n;
520   PetscTruth     r1,r2,r3;
521   MatScalar      *val;
522   InsertMode     addv = mat->insertmode;
523 
524   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
525   PetscFunctionBegin;
526 
527   if (!baij->donotstash) {
528     while (1) {
529       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
530       if (!flg) break;
531 
532       for (i=0; i<n;) {
533         /* Now identify the consecutive vals belonging to the same row */
534         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
535         if (j < n) ncols = j-i;
536         else       ncols = n-i;
537         /* Now assemble all these values with a single function call */
538         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
539         i = j;
540       }
541     }
542     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
543     /* Now process the block-stash. Since the values are stashed column-oriented,
544        set the roworiented flag to column oriented, and after MatSetValues()
545        restore the original flags */
546     r1 = baij->roworiented;
547     r2 = a->roworiented;
548     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
549     baij->roworiented = PETSC_FALSE;
550     a->roworiented    = PETSC_FALSE;
551     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
552     while (1) {
553       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
554       if (!flg) break;
555 
556       for (i=0; i<n;) {
557         /* Now identify the consecutive vals belonging to the same row */
558         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566     baij->roworiented = r1;
567     a->roworiented    = r2;
568     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
569   }
570 
571   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
572   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
573 
574   /* determine if any processor has disassembled, if so we must
575      also disassemble ourselfs, in order that we may reassemble. */
576   /*
577      if nonzero structure of submatrix B cannot change then we know that
578      no processor disassembled thus we can skip this stuff
579   */
580   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
581     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
582     if (mat->was_assembled && !other_disassembled) {
583       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
584     }
585   }
586 
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
589   }
590   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
591   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
592   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
593 
594   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
595   baij->rowvalues = 0;
596 
597   PetscFunctionReturn(0);
598 }
599 
600 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
601 #undef __FUNCT__
602 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
603 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
604 {
605   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
606   PetscErrorCode    ierr;
607   PetscInt          bs = mat->rmap->bs;
608   PetscMPIInt       size = baij->size,rank = baij->rank;
609   PetscTruth        iascii,isdraw;
610   PetscViewer       sviewer;
611   PetscViewerFormat format;
612 
613   PetscFunctionBegin;
614   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
616   if (iascii) {
617     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
618     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
619       MatInfo info;
620       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
621       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
622       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
623               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
624               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
625       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
629       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
630       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
631       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
632       PetscFunctionReturn(0);
633     } else if (format == PETSC_VIEWER_ASCII_INFO) {
634       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
637       PetscFunctionReturn(0);
638     }
639   }
640 
641   if (isdraw) {
642     PetscDraw  draw;
643     PetscTruth isnull;
644     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
645     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
646   }
647 
648   if (size == 1) {
649     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
650     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
651   } else {
652     /* assemble the entire matrix onto first processor. */
653     Mat          A;
654     Mat_SeqSBAIJ *Aloc;
655     Mat_SeqBAIJ  *Bloc;
656     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
657     MatScalar    *a;
658 
659     /* Should this be the same type as mat? */
660     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
661     if (!rank) {
662       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
663     } else {
664       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
665     }
666     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
667     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
668     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
669 
670     /* copy over the A part */
671     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
672     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
673     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
674 
675     for (i=0; i<mbs; i++) {
676       rvals[0] = bs*(baij->rstartbs + i);
677       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
678       for (j=ai[i]; j<ai[i+1]; j++) {
679         col = (baij->cstartbs+aj[j])*bs;
680         for (k=0; k<bs; k++) {
681           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
682           col++; a += bs;
683         }
684       }
685     }
686     /* copy over the B part */
687     Bloc = (Mat_SeqBAIJ*)baij->B->data;
688     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
689     for (i=0; i<mbs; i++) {
690 
691       rvals[0] = bs*(baij->rstartbs + i);
692       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
693       for (j=ai[i]; j<ai[i+1]; j++) {
694         col = baij->garray[aj[j]]*bs;
695         for (k=0; k<bs; k++) {
696           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
697           col++; a += bs;
698         }
699       }
700     }
701     ierr = PetscFree(rvals);CHKERRQ(ierr);
702     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
703     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     /*
705        Everyone has to call to draw the matrix since the graphics waits are
706        synchronized across all processors that share the PetscDraw object
707     */
708     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
709     if (!rank) {
710       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
711       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
712     }
713     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
714     ierr = MatDestroy(A);CHKERRQ(ierr);
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatView_MPISBAIJ"
721 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
722 {
723   PetscErrorCode ierr;
724   PetscTruth     iascii,isdraw,issocket,isbinary;
725 
726   PetscFunctionBegin;
727   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
731   if (iascii || isdraw || issocket || isbinary) {
732     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
733   } else {
734     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatDestroy_MPISBAIJ"
741 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
742 {
743   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
744   PetscErrorCode ierr;
745 
746   PetscFunctionBegin;
747 #if defined(PETSC_USE_LOG)
748   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
749 #endif
750   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
751   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
752   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
754 #if defined (PETSC_USE_CTABLE)
755   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
756 #else
757   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
758 #endif
759   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
760   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
761   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
762   if (baij->slvec0) {
763     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
764     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
765   }
766   if (baij->slvec1) {
767     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
768     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
770   }
771   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
772   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
773   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
774   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
775   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
776   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
777   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
778 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
779   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
780 #endif
781   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
782   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
784   ierr = PetscFree(baij);CHKERRQ(ierr);
785 
786   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
796 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
797 {
798   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
799   PetscErrorCode ierr;
800   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
801   PetscScalar    *x,*from;
802 
803   PetscFunctionBegin;
804   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
805   if (nt != A->cmap->n) {
806     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807   }
808 
809   /* diagonal part */
810   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
811   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
812 
813   /* subdiagonal part */
814   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
815 
816   /* copy x into the vec slvec0 */
817   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
818   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819 
820   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
821   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   /* supperdiagonal part */
827   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "MatMult_MPISBAIJ"
833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
834 {
835   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
836   PetscErrorCode ierr;
837   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
838   PetscScalar    *x,*from;
839 
840   PetscFunctionBegin;
841   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
842   if (nt != A->cmap->n) {
843     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844   }
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) {
879     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
880   }
881   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
882   if (nt != A->rmap->N) {
883     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
884   }
885 
886   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   /* do diagonal part */
888   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
889   /* do supperdiagonal part */
890   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
892   /* do subdiagonal part */
893   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
894   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
895   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
896 
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
902 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
903 {
904   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
905   PetscErrorCode ierr;
906   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
907   PetscScalar    *x,*from,zero=0.0;
908 
909   PetscFunctionBegin;
910   /*
911   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
912   PetscSynchronizedFlush(((PetscObject)A)->comm);
913   */
914   /* diagonal part */
915   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
916   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
917 
918   /* subdiagonal part */
919   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
920 
921   /* copy x into the vec slvec0 */
922   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
923   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
924   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
925   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
926 
927   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
930 
931   /* supperdiagonal part */
932   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
933 
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
939 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
940 {
941   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
942   PetscErrorCode ierr;
943 
944   PetscFunctionBegin;
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   /* do diagonal part */
947   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
948   /* do supperdiagonal part */
949   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
951 
952   /* do subdiagonal part */
953   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
954   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
955   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
956 
957   PetscFunctionReturn(0);
958 }
959 
960 /*
961   This only works correctly for square matrices where the subblock A->A is the
962    diagonal block
963 */
964 #undef __FUNCT__
965 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
966 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
967 {
968   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
973   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatScale_MPISBAIJ"
979 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
980 {
981   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
986   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatGetRow_MPISBAIJ"
992 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
993 {
994   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
995   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
996   PetscErrorCode ierr;
997   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
998   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
999   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1000 
1001   PetscFunctionBegin;
1002   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1003   mat->getrowactive = PETSC_TRUE;
1004 
1005   if (!mat->rowvalues && (idx || v)) {
1006     /*
1007         allocate enough space to hold information from the longest row.
1008     */
1009     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1010     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1011     PetscInt     max = 1,mbs = mat->mbs,tmp;
1012     for (i=0; i<mbs; i++) {
1013       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1014       if (max < tmp) { max = tmp; }
1015     }
1016     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1017   }
1018 
1019   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1020   lrow = row - brstart;  /* local row index */
1021 
1022   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1023   if (!v)   {pvA = 0; pvB = 0;}
1024   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1025   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1026   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1027   nztot = nzA + nzB;
1028 
1029   cmap  = mat->garray;
1030   if (v  || idx) {
1031     if (nztot) {
1032       /* Sort by increasing column numbers, assuming A and B already sorted */
1033       PetscInt imark = -1;
1034       if (v) {
1035         *v = v_p = mat->rowvalues;
1036         for (i=0; i<nzB; i++) {
1037           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1038           else break;
1039         }
1040         imark = i;
1041         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1042         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1043       }
1044       if (idx) {
1045         *idx = idx_p = mat->rowindices;
1046         if (imark > -1) {
1047           for (i=0; i<imark; i++) {
1048             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1049           }
1050         } else {
1051           for (i=0; i<nzB; i++) {
1052             if (cmap[cworkB[i]/bs] < cstart)
1053               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054             else break;
1055           }
1056           imark = i;
1057         }
1058         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1059         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1060       }
1061     } else {
1062       if (idx) *idx = 0;
1063       if (v)   *v   = 0;
1064     }
1065   }
1066   *nz = nztot;
1067   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1068   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1075 {
1076   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1077 
1078   PetscFunctionBegin;
1079   if (!baij->getrowactive) {
1080     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1081   }
1082   baij->getrowactive = PETSC_FALSE;
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1088 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1089 {
1090   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1091   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1092 
1093   PetscFunctionBegin;
1094   aA->getrow_utriangular = PETSC_TRUE;
1095   PetscFunctionReturn(0);
1096 }
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1099 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1100 {
1101   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1102   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1103 
1104   PetscFunctionBegin;
1105   aA->getrow_utriangular = PETSC_FALSE;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1111 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1112 {
1113   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1118   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1124 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1131   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1137 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1138 {
1139   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1144   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1150 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1153   Mat            A = a->A,B = a->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = (PetscReal)matin->rmap->bs;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   } else {
1186     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1187   }
1188   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1189   info->fill_ratio_needed = 0;
1190   info->factor_mallocs    = 0;
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1196 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1197 {
1198   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1199   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NEW_NONZERO_LOCATIONS:
1205   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1206   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1207   case MAT_KEEP_NONZERO_PATTERN:
1208   case MAT_NEW_NONZERO_LOCATION_ERR:
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_ROW_ORIENTED:
1213     a->roworiented = flg;
1214     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1215     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1216     break;
1217   case MAT_NEW_DIAGONALS:
1218     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1219     break;
1220   case MAT_IGNORE_OFF_PROC_ENTRIES:
1221     a->donotstash = flg;
1222     break;
1223   case MAT_USE_HASH_TABLE:
1224     a->ht_flag = flg;
1225     break;
1226   case MAT_HERMITIAN:
1227     if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1228     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1229     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1230     break;
1231   case MAT_SYMMETRIC:
1232     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1233     break;
1234   case MAT_STRUCTURALLY_SYMMETRIC:
1235     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1236     break;
1237   case MAT_SYMMETRY_ETERNAL:
1238     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1239     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1240     break;
1241   case MAT_IGNORE_LOWER_TRIANGULAR:
1242     aA->ignore_ltriangular = flg;
1243     break;
1244   case MAT_ERROR_LOWER_TRIANGULAR:
1245     aA->ignore_ltriangular = flg;
1246     break;
1247   case MAT_GETROW_UPPERTRIANGULAR:
1248     aA->getrow_utriangular = flg;
1249     break;
1250   default:
1251     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1252   }
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1258 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1259 {
1260   PetscErrorCode ierr;
1261   PetscFunctionBegin;
1262   if (MAT_INITIAL_MATRIX || *B != A) {
1263     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1270 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1271 {
1272   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1273   Mat            a=baij->A, b=baij->B;
1274   PetscErrorCode ierr;
1275   PetscInt       nv,m,n;
1276   PetscTruth     flg;
1277 
1278   PetscFunctionBegin;
1279   if (ll != rr){
1280     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1281     if (!flg)
1282       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1283   }
1284   if (!ll) PetscFunctionReturn(0);
1285 
1286   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1287   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1288 
1289   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1290   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1291 
1292   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293 
1294   /* left diagonalscale the off-diagonal part */
1295   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1296 
1297   /* scale the diagonal part */
1298   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1299 
1300   /* right diagonalscale the off-diagonal part */
1301   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1302   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1308 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1309 {
1310   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1311   PetscErrorCode ierr;
1312 
1313   PetscFunctionBegin;
1314   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatEqual_MPISBAIJ"
1322 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1323 {
1324   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1325   Mat            a,b,c,d;
1326   PetscTruth     flg;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   a = matA->A; b = matA->B;
1331   c = matB->A; d = matB->B;
1332 
1333   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1334   if (flg) {
1335     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1336   }
1337   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatCopy_MPISBAIJ"
1343 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1344 {
1345   PetscErrorCode ierr;
1346   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1347   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1348 
1349   PetscFunctionBegin;
1350   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1351   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1352     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1353     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1354     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1355   } else {
1356     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1357     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1358   }
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1364 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1365 {
1366   PetscErrorCode ierr;
1367 
1368   PetscFunctionBegin;
1369   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #include "petscblaslapack.h"
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1377 {
1378   PetscErrorCode ierr;
1379   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1380   PetscBLASInt   bnz,one=1;
1381   Mat_SeqSBAIJ   *xa,*ya;
1382   Mat_SeqBAIJ    *xb,*yb;
1383 
1384   PetscFunctionBegin;
1385   if (str == SAME_NONZERO_PATTERN) {
1386     PetscScalar alpha = a;
1387     xa = (Mat_SeqSBAIJ *)xx->A->data;
1388     ya = (Mat_SeqSBAIJ *)yy->A->data;
1389     bnz = PetscBLASIntCast(xa->nz);
1390     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1391     xb = (Mat_SeqBAIJ *)xx->B->data;
1392     yb = (Mat_SeqBAIJ *)yy->B->data;
1393     bnz = PetscBLASIntCast(xb->nz);
1394     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1395   } else {
1396     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1397     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1398     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1406 {
1407   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1408   PetscInt        rbs,cbs;
1409   PetscErrorCode  ierr;
1410 
1411   PetscFunctionBegin;
1412   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1413   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1414   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1415   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1416   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1417   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1424 {
1425   PetscErrorCode ierr;
1426   PetscInt       i;
1427   PetscTruth     flg;
1428 
1429   PetscFunctionBegin;
1430   for (i=0; i<n; i++) {
1431     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1432     if (!flg) {
1433       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1434     }
1435   }
1436   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 
1441 /* -------------------------------------------------------------------*/
1442 static struct _MatOps MatOps_Values = {
1443        MatSetValues_MPISBAIJ,
1444        MatGetRow_MPISBAIJ,
1445        MatRestoreRow_MPISBAIJ,
1446        MatMult_MPISBAIJ,
1447 /* 4*/ MatMultAdd_MPISBAIJ,
1448        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1449        MatMultAdd_MPISBAIJ,
1450        0,
1451        0,
1452        0,
1453 /*10*/ 0,
1454        0,
1455        0,
1456        MatSOR_MPISBAIJ,
1457        MatTranspose_MPISBAIJ,
1458 /*15*/ MatGetInfo_MPISBAIJ,
1459        MatEqual_MPISBAIJ,
1460        MatGetDiagonal_MPISBAIJ,
1461        MatDiagonalScale_MPISBAIJ,
1462        MatNorm_MPISBAIJ,
1463 /*20*/ MatAssemblyBegin_MPISBAIJ,
1464        MatAssemblyEnd_MPISBAIJ,
1465        MatSetOption_MPISBAIJ,
1466        MatZeroEntries_MPISBAIJ,
1467 /*24*/ 0,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477 /*34*/ MatDuplicate_MPISBAIJ,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*39*/ MatAXPY_MPISBAIJ,
1483        MatGetSubMatrices_MPISBAIJ,
1484        MatIncreaseOverlap_MPISBAIJ,
1485        MatGetValues_MPISBAIJ,
1486        MatCopy_MPISBAIJ,
1487 /*44*/ 0,
1488        MatScale_MPISBAIJ,
1489        0,
1490        0,
1491        0,
1492 /*49*/ MatSetBlockSize_MPISBAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497 /*54*/ 0,
1498        0,
1499        MatSetUnfactored_MPISBAIJ,
1500        0,
1501        MatSetValuesBlocked_MPISBAIJ,
1502 /*59*/ 0,
1503        0,
1504        0,
1505        0,
1506        0,
1507 /*64*/ 0,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1513        0,
1514        0,
1515        0,
1516        0,
1517 /*74*/ 0,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*79*/ 0,
1523        0,
1524        0,
1525        0,
1526        MatLoad_MPISBAIJ,
1527 /*84*/ 0,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*89*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*94*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*99*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*104*/0,
1548        MatRealPart_MPISBAIJ,
1549        MatImaginaryPart_MPISBAIJ,
1550        MatGetRowUpperTriangular_MPISBAIJ,
1551        MatRestoreRowUpperTriangular_MPISBAIJ
1552 };
1553 
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1558 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1559 {
1560   PetscFunctionBegin;
1561   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1562   *iscopy = PETSC_FALSE;
1563   PetscFunctionReturn(0);
1564 }
1565 EXTERN_C_END
1566 
1567 EXTERN_C_BEGIN
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1570 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1571 {
1572   Mat_MPISBAIJ   *b;
1573   PetscErrorCode ierr;
1574   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1575 
1576   PetscFunctionBegin;
1577   if (bs < 0){
1578     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1579       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1580     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1581     bs   = PetscAbs(bs);
1582   }
1583   if ((d_nnz || o_nnz) && newbs != bs) {
1584     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1585   }
1586   bs = newbs;
1587 
1588   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1589   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1590   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1591   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1592 
1593   B->rmap->bs = B->cmap->bs = bs;
1594   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1595   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1596 
1597   if (d_nnz) {
1598     for (i=0; i<B->rmap->n/bs; i++) {
1599       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1600     }
1601   }
1602   if (o_nnz) {
1603     for (i=0; i<B->rmap->n/bs; i++) {
1604       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1605     }
1606   }
1607 
1608   b   = (Mat_MPISBAIJ*)B->data;
1609   mbs = B->rmap->n/bs;
1610   Mbs = B->rmap->N/bs;
1611   if (mbs*bs != B->rmap->n) {
1612     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1613   }
1614 
1615   B->rmap->bs  = bs;
1616   b->bs2 = bs*bs;
1617   b->mbs = mbs;
1618   b->nbs = mbs;
1619   b->Mbs = Mbs;
1620   b->Nbs = Mbs;
1621 
1622   for (i=0; i<=b->size; i++) {
1623     b->rangebs[i] = B->rmap->range[i]/bs;
1624   }
1625   b->rstartbs = B->rmap->rstart/bs;
1626   b->rendbs   = B->rmap->rend/bs;
1627 
1628   b->cstartbs = B->cmap->rstart/bs;
1629   b->cendbs   = B->cmap->rend/bs;
1630 
1631   if (!B->preallocated) {
1632     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1633     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1634     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1635     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1636     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1637     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1638     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1639     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1640     /* build cache for off array entries formed */
1641     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1642   }
1643 
1644   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1645   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1646   B->preallocated = PETSC_TRUE;
1647   PetscFunctionReturn(0);
1648 }
1649 EXTERN_C_END
1650 
1651 EXTERN_C_BEGIN
1652 #if defined(PETSC_HAVE_MUMPS)
1653 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1654 #endif
1655 #if defined(PETSC_HAVE_SPOOLES)
1656 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1657 #endif
1658 #if defined(PETSC_HAVE_PASTIX)
1659 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1660 #endif
1661 EXTERN_C_END
1662 
1663 /*MC
1664    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1665    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1666 
1667    Options Database Keys:
1668 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1669 
1670   Level: beginner
1671 
1672 .seealso: MatCreateMPISBAIJ
1673 M*/
1674 
1675 EXTERN_C_BEGIN
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatCreate_MPISBAIJ"
1678 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1679 {
1680   Mat_MPISBAIJ   *b;
1681   PetscErrorCode ierr;
1682   PetscTruth     flg;
1683 
1684   PetscFunctionBegin;
1685 
1686   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1687   B->data = (void*)b;
1688   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1689 
1690   B->ops->destroy    = MatDestroy_MPISBAIJ;
1691   B->ops->view       = MatView_MPISBAIJ;
1692   B->mapping         = 0;
1693   B->assembled       = PETSC_FALSE;
1694 
1695   B->insertmode = NOT_SET_VALUES;
1696   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1697   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1698 
1699   /* build local table of row and column ownerships */
1700   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1701 
1702   /* build cache for off array entries formed */
1703   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1704   b->donotstash  = PETSC_FALSE;
1705   b->colmap      = PETSC_NULL;
1706   b->garray      = PETSC_NULL;
1707   b->roworiented = PETSC_TRUE;
1708 
1709   /* stuff used in block assembly */
1710   b->barray       = 0;
1711 
1712   /* stuff used for matrix vector multiply */
1713   b->lvec         = 0;
1714   b->Mvctx        = 0;
1715   b->slvec0       = 0;
1716   b->slvec0b      = 0;
1717   b->slvec1       = 0;
1718   b->slvec1a      = 0;
1719   b->slvec1b      = 0;
1720   b->sMvctx       = 0;
1721 
1722   /* stuff for MatGetRow() */
1723   b->rowindices   = 0;
1724   b->rowvalues    = 0;
1725   b->getrowactive = PETSC_FALSE;
1726 
1727   /* hash table stuff */
1728   b->ht           = 0;
1729   b->hd           = 0;
1730   b->ht_size      = 0;
1731   b->ht_flag      = PETSC_FALSE;
1732   b->ht_fact      = 0;
1733   b->ht_total_ct  = 0;
1734   b->ht_insert_ct = 0;
1735 
1736   b->in_loc       = 0;
1737   b->v_loc        = 0;
1738   b->n_loc        = 0;
1739   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1740     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1741     if (flg) {
1742       PetscReal fact = 1.39;
1743       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1744       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1745       if (fact <= 1.0) fact = 1.39;
1746       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1747       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1748     }
1749   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1750 
1751 #if defined(PETSC_HAVE_PASTIX)
1752   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1753 					   "MatGetFactor_mpisbaij_pastix",
1754 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1755 #endif
1756 #if defined(PETSC_HAVE_MUMPS)
1757   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1758                                      "MatGetFactor_mpisbaij_mumps",
1759                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1760 #endif
1761 #if defined(PETSC_HAVE_SPOOLES)
1762   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1763                                      "MatGetFactor_mpisbaij_spooles",
1764                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1765 #endif
1766   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1767                                      "MatStoreValues_MPISBAIJ",
1768                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1770                                      "MatRetrieveValues_MPISBAIJ",
1771                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1773                                      "MatGetDiagonalBlock_MPISBAIJ",
1774                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1776                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1777                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1778   B->symmetric                  = PETSC_TRUE;
1779   B->structurally_symmetric     = PETSC_TRUE;
1780   B->symmetric_set              = PETSC_TRUE;
1781   B->structurally_symmetric_set = PETSC_TRUE;
1782   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 EXTERN_C_END
1786 
1787 /*MC
1788    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1789 
1790    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1791    and MATMPISBAIJ otherwise.
1792 
1793    Options Database Keys:
1794 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1795 
1796   Level: beginner
1797 
1798 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1799 M*/
1800 
1801 EXTERN_C_BEGIN
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatCreate_SBAIJ"
1804 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1805 {
1806   PetscErrorCode ierr;
1807   PetscMPIInt    size;
1808 
1809   PetscFunctionBegin;
1810   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1811   if (size == 1) {
1812     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1813   } else {
1814     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 EXTERN_C_END
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1822 /*@C
1823    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1824    the user should preallocate the matrix storage by setting the parameters
1825    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1826    performance can be increased by more than a factor of 50.
1827 
1828    Collective on Mat
1829 
1830    Input Parameters:
1831 +  A - the matrix
1832 .  bs   - size of blockk
1833 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1834            submatrix  (same for all local rows)
1835 .  d_nnz - array containing the number of block nonzeros in the various block rows
1836            in the upper triangular and diagonal part of the in diagonal portion of the local
1837            (possibly different for each block row) or PETSC_NULL.  You must leave room
1838            for the diagonal entry even if it is zero.
1839 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1840            submatrix (same for all local rows).
1841 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1842            off-diagonal portion of the local submatrix (possibly different for
1843            each block row) or PETSC_NULL.
1844 
1845 
1846    Options Database Keys:
1847 .   -mat_no_unroll - uses code that does not unroll the loops in the
1848                      block calculations (much slower)
1849 .   -mat_block_size - size of the blocks to use
1850 
1851    Notes:
1852 
1853    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1854    than it must be used on all processors that share the object for that argument.
1855 
1856    If the *_nnz parameter is given then the *_nz parameter is ignored
1857 
1858    Storage Information:
1859    For a square global matrix we define each processor's diagonal portion
1860    to be its local rows and the corresponding columns (a square submatrix);
1861    each processor's off-diagonal portion encompasses the remainder of the
1862    local matrix (a rectangular submatrix).
1863 
1864    The user can specify preallocated storage for the diagonal part of
1865    the local submatrix with either d_nz or d_nnz (not both).  Set
1866    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1867    memory allocation.  Likewise, specify preallocated storage for the
1868    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1869 
1870    You can call MatGetInfo() to get information on how effective the preallocation was;
1871    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1872    You can also run with the option -info and look for messages with the string
1873    malloc in them to see if additional memory allocation was needed.
1874 
1875    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1876    the figure below we depict these three local rows and all columns (0-11).
1877 
1878 .vb
1879            0 1 2 3 4 5 6 7 8 9 10 11
1880           -------------------
1881    row 3  |  o o o d d d o o o o o o
1882    row 4  |  o o o d d d o o o o o o
1883    row 5  |  o o o d d d o o o o o o
1884           -------------------
1885 .ve
1886 
1887    Thus, any entries in the d locations are stored in the d (diagonal)
1888    submatrix, and any entries in the o locations are stored in the
1889    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1890    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1891 
1892    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1893    plus the diagonal part of the d matrix,
1894    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1895    In general, for PDE problems in which most nonzeros are near the diagonal,
1896    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1897    or you will get TERRIBLE performance; see the users' manual chapter on
1898    matrices.
1899 
1900    Level: intermediate
1901 
1902 .keywords: matrix, block, aij, compressed row, sparse, parallel
1903 
1904 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1905 @*/
1906 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1907 {
1908   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1909 
1910   PetscFunctionBegin;
1911   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1912   if (f) {
1913     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatCreateMPISBAIJ"
1920 /*@C
1921    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1922    (block compressed row).  For good matrix assembly performance
1923    the user should preallocate the matrix storage by setting the parameters
1924    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1925    performance can be increased by more than a factor of 50.
1926 
1927    Collective on MPI_Comm
1928 
1929    Input Parameters:
1930 +  comm - MPI communicator
1931 .  bs   - size of blockk
1932 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1933            This value should be the same as the local size used in creating the
1934            y vector for the matrix-vector product y = Ax.
1935 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1936            This value should be the same as the local size used in creating the
1937            x vector for the matrix-vector product y = Ax.
1938 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1939 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1940 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1941            submatrix  (same for all local rows)
1942 .  d_nnz - array containing the number of block nonzeros in the various block rows
1943            in the upper triangular portion of the in diagonal portion of the local
1944            (possibly different for each block block row) or PETSC_NULL.
1945            You must leave room for the diagonal entry even if it is zero.
1946 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1947            submatrix (same for all local rows).
1948 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1949            off-diagonal portion of the local submatrix (possibly different for
1950            each block row) or PETSC_NULL.
1951 
1952    Output Parameter:
1953 .  A - the matrix
1954 
1955    Options Database Keys:
1956 .   -mat_no_unroll - uses code that does not unroll the loops in the
1957                      block calculations (much slower)
1958 .   -mat_block_size - size of the blocks to use
1959 .   -mat_mpi - use the parallel matrix data structures even on one processor
1960                (defaults to using SeqBAIJ format on one processor)
1961 
1962    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1963    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1964    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1965 
1966    Notes:
1967    The number of rows and columns must be divisible by blocksize.
1968    This matrix type does not support complex Hermitian operation.
1969 
1970    The user MUST specify either the local or global matrix dimensions
1971    (possibly both).
1972 
1973    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1974    than it must be used on all processors that share the object for that argument.
1975 
1976    If the *_nnz parameter is given then the *_nz parameter is ignored
1977 
1978    Storage Information:
1979    For a square global matrix we define each processor's diagonal portion
1980    to be its local rows and the corresponding columns (a square submatrix);
1981    each processor's off-diagonal portion encompasses the remainder of the
1982    local matrix (a rectangular submatrix).
1983 
1984    The user can specify preallocated storage for the diagonal part of
1985    the local submatrix with either d_nz or d_nnz (not both).  Set
1986    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1987    memory allocation.  Likewise, specify preallocated storage for the
1988    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1989 
1990    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1991    the figure below we depict these three local rows and all columns (0-11).
1992 
1993 .vb
1994            0 1 2 3 4 5 6 7 8 9 10 11
1995           -------------------
1996    row 3  |  o o o d d d o o o o o o
1997    row 4  |  o o o d d d o o o o o o
1998    row 5  |  o o o d d d o o o o o o
1999           -------------------
2000 .ve
2001 
2002    Thus, any entries in the d locations are stored in the d (diagonal)
2003    submatrix, and any entries in the o locations are stored in the
2004    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2005    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2006 
2007    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2008    plus the diagonal part of the d matrix,
2009    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2010    In general, for PDE problems in which most nonzeros are near the diagonal,
2011    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2012    or you will get TERRIBLE performance; see the users' manual chapter on
2013    matrices.
2014 
2015    Level: intermediate
2016 
2017 .keywords: matrix, block, aij, compressed row, sparse, parallel
2018 
2019 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2020 @*/
2021 
2022 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2023 {
2024   PetscErrorCode ierr;
2025   PetscMPIInt    size;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2029   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2030   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2031   if (size > 1) {
2032     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2033     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2034   } else {
2035     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2036     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2037   }
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2044 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2045 {
2046   Mat            mat;
2047   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2048   PetscErrorCode ierr;
2049   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2050   PetscScalar    *array;
2051 
2052   PetscFunctionBegin;
2053   *newmat       = 0;
2054   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2055   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2056   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2057   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2058   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2059   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2060 
2061   mat->factor       = matin->factor;
2062   mat->preallocated = PETSC_TRUE;
2063   mat->assembled    = PETSC_TRUE;
2064   mat->insertmode   = NOT_SET_VALUES;
2065 
2066   a = (Mat_MPISBAIJ*)mat->data;
2067   a->bs2   = oldmat->bs2;
2068   a->mbs   = oldmat->mbs;
2069   a->nbs   = oldmat->nbs;
2070   a->Mbs   = oldmat->Mbs;
2071   a->Nbs   = oldmat->Nbs;
2072 
2073 
2074   a->size         = oldmat->size;
2075   a->rank         = oldmat->rank;
2076   a->donotstash   = oldmat->donotstash;
2077   a->roworiented  = oldmat->roworiented;
2078   a->rowindices   = 0;
2079   a->rowvalues    = 0;
2080   a->getrowactive = PETSC_FALSE;
2081   a->barray       = 0;
2082   a->rstartbs    = oldmat->rstartbs;
2083   a->rendbs      = oldmat->rendbs;
2084   a->cstartbs    = oldmat->cstartbs;
2085   a->cendbs      = oldmat->cendbs;
2086 
2087   /* hash table stuff */
2088   a->ht           = 0;
2089   a->hd           = 0;
2090   a->ht_size      = 0;
2091   a->ht_flag      = oldmat->ht_flag;
2092   a->ht_fact      = oldmat->ht_fact;
2093   a->ht_total_ct  = 0;
2094   a->ht_insert_ct = 0;
2095 
2096   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2097   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2098   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2099   if (oldmat->colmap) {
2100 #if defined (PETSC_USE_CTABLE)
2101     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2102 #else
2103     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2104     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2105     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2106 #endif
2107   } else a->colmap = 0;
2108 
2109   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2110     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2111     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2112     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2113   } else a->garray = 0;
2114 
2115   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2116   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2117   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2119 
2120   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2122   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2123   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2124 
2125   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2126   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2127   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2128   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2129   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2130   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2131   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2132   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2138 
2139   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2140   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2141   a->sMvctx = oldmat->sMvctx;
2142   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2143 
2144   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2146   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2148   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2149   *newmat = mat;
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #include "petscsys.h"
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatLoad_MPISBAIJ"
2157 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2158 {
2159   Mat            A;
2160   PetscErrorCode ierr;
2161   PetscInt       i,nz,j,rstart,rend;
2162   PetscScalar    *vals,*buf;
2163   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2164   MPI_Status     status;
2165   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2166   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2167   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2168   PetscInt       bs=1,Mbs,mbs,extra_rows;
2169   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2170   PetscInt       dcount,kmax,k,nzcount,tmp;
2171   int            fd;
2172 
2173   PetscFunctionBegin;
2174   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2175     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2176   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2177 
2178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2180   if (!rank) {
2181     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2182     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2183     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2184     if (header[3] < 0) {
2185       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2186     }
2187   }
2188 
2189   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2190   M = header[1]; N = header[2];
2191 
2192   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2193 
2194   /*
2195      This code adds extra rows to make sure the number of rows is
2196      divisible by the blocksize
2197   */
2198   Mbs        = M/bs;
2199   extra_rows = bs - M + bs*(Mbs);
2200   if (extra_rows == bs) extra_rows = 0;
2201   else                  Mbs++;
2202   if (extra_rows &&!rank) {
2203     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2204   }
2205 
2206   /* determine ownership of all rows */
2207   mbs        = Mbs/size + ((Mbs % size) > rank);
2208   m          = mbs*bs;
2209   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2210   mmbs       = PetscMPIIntCast(mbs);
2211   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2212   rowners[0] = 0;
2213   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2214   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2215   rstart = rowners[rank];
2216   rend   = rowners[rank+1];
2217 
2218   /* distribute row lengths to all processors */
2219   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2220   if (!rank) {
2221     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2222     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2223     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2224     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2225     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2226     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2227     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2228   } else {
2229     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2230   }
2231 
2232   if (!rank) {   /* procs[0] */
2233     /* calculate the number of nonzeros on each processor */
2234     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2235     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2236     for (i=0; i<size; i++) {
2237       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2238         procsnz[i] += rowlengths[j];
2239       }
2240     }
2241     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2242 
2243     /* determine max buffer needed and allocate it */
2244     maxnz = 0;
2245     for (i=0; i<size; i++) {
2246       maxnz = PetscMax(maxnz,procsnz[i]);
2247     }
2248     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2249 
2250     /* read in my part of the matrix column indices  */
2251     nz     = procsnz[0];
2252     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2253     mycols = ibuf;
2254     if (size == 1)  nz -= extra_rows;
2255     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2256     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2257 
2258     /* read in every ones (except the last) and ship off */
2259     for (i=1; i<size-1; i++) {
2260       nz   = procsnz[i];
2261       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2262       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2263     }
2264     /* read in the stuff for the last proc */
2265     if (size != 1) {
2266       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2267       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2268       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2269       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2270     }
2271     ierr = PetscFree(cols);CHKERRQ(ierr);
2272   } else {  /* procs[i], i>0 */
2273     /* determine buffer space needed for message */
2274     nz = 0;
2275     for (i=0; i<m; i++) {
2276       nz += locrowlens[i];
2277     }
2278     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2279     mycols = ibuf;
2280     /* receive message of column indices*/
2281     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2282     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2283     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2284   }
2285 
2286   /* loop over local rows, determining number of off diagonal entries */
2287   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2288   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2289   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2290   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2291   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2292   rowcount = 0;
2293   nzcount  = 0;
2294   for (i=0; i<mbs; i++) {
2295     dcount  = 0;
2296     odcount = 0;
2297     for (j=0; j<bs; j++) {
2298       kmax = locrowlens[rowcount];
2299       for (k=0; k<kmax; k++) {
2300         tmp = mycols[nzcount++]/bs; /* block col. index */
2301         if (!mask[tmp]) {
2302           mask[tmp] = 1;
2303           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2304           else masked1[dcount++] = tmp; /* entry in diag portion */
2305         }
2306       }
2307       rowcount++;
2308     }
2309 
2310     dlens[i]  = dcount;  /* d_nzz[i] */
2311     odlens[i] = odcount; /* o_nzz[i] */
2312 
2313     /* zero out the mask elements we set */
2314     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2315     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2316   }
2317 
2318   /* create our matrix */
2319   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2320   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2321   ierr = MatSetType(A,type);CHKERRQ(ierr);
2322   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2323   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2324 
2325   if (!rank) {
2326     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2327     /* read in my part of the matrix numerical values  */
2328     nz = procsnz[0];
2329     vals = buf;
2330     mycols = ibuf;
2331     if (size == 1)  nz -= extra_rows;
2332     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2333     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2334 
2335     /* insert into matrix */
2336     jj      = rstart*bs;
2337     for (i=0; i<m; i++) {
2338       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2339       mycols += locrowlens[i];
2340       vals   += locrowlens[i];
2341       jj++;
2342     }
2343 
2344     /* read in other processors (except the last one) and ship out */
2345     for (i=1; i<size-1; i++) {
2346       nz   = procsnz[i];
2347       vals = buf;
2348       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2349       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2350     }
2351     /* the last proc */
2352     if (size != 1){
2353       nz   = procsnz[i] - extra_rows;
2354       vals = buf;
2355       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2356       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2357       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2358     }
2359     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2360 
2361   } else {
2362     /* receive numeric values */
2363     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2364 
2365     /* receive message of values*/
2366     vals   = buf;
2367     mycols = ibuf;
2368     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2369     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2370     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2371 
2372     /* insert into matrix */
2373     jj      = rstart*bs;
2374     for (i=0; i<m; i++) {
2375       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2376       mycols += locrowlens[i];
2377       vals   += locrowlens[i];
2378       jj++;
2379     }
2380   }
2381 
2382   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2383   ierr = PetscFree(buf);CHKERRQ(ierr);
2384   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2385   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2386   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2387   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   *newmat = A;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2396 /*XXXXX@
2397    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2398 
2399    Input Parameters:
2400 .  mat  - the matrix
2401 .  fact - factor
2402 
2403    Collective on Mat
2404 
2405    Level: advanced
2406 
2407   Notes:
2408    This can also be set by the command line option: -mat_use_hash_table fact
2409 
2410 .keywords: matrix, hashtable, factor, HT
2411 
2412 .seealso: MatSetOption()
2413 @XXXXX*/
2414 
2415 
2416 #undef __FUNCT__
2417 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2418 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2419 {
2420   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2421   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2422   PetscReal      atmp;
2423   PetscReal      *work,*svalues,*rvalues;
2424   PetscErrorCode ierr;
2425   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2426   PetscMPIInt    rank,size;
2427   PetscInt       *rowners_bs,dest,count,source;
2428   PetscScalar    *va;
2429   MatScalar      *ba;
2430   MPI_Status     stat;
2431 
2432   PetscFunctionBegin;
2433   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2434   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2436 
2437   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2438   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2439 
2440   bs   = A->rmap->bs;
2441   mbs  = a->mbs;
2442   Mbs  = a->Mbs;
2443   ba   = b->a;
2444   bi   = b->i;
2445   bj   = b->j;
2446 
2447   /* find ownerships */
2448   rowners_bs = A->rmap->range;
2449 
2450   /* each proc creates an array to be distributed */
2451   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2452   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2453 
2454   /* row_max for B */
2455   if (rank != size-1){
2456     for (i=0; i<mbs; i++) {
2457       ncols = bi[1] - bi[0]; bi++;
2458       brow  = bs*i;
2459       for (j=0; j<ncols; j++){
2460         bcol = bs*(*bj);
2461         for (kcol=0; kcol<bs; kcol++){
2462           col = bcol + kcol;                 /* local col index */
2463           col += rowners_bs[rank+1];      /* global col index */
2464           for (krow=0; krow<bs; krow++){
2465             atmp = PetscAbsScalar(*ba); ba++;
2466             row = brow + krow;    /* local row index */
2467             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2468             if (work[col] < atmp) work[col] = atmp;
2469           }
2470         }
2471         bj++;
2472       }
2473     }
2474 
2475     /* send values to its owners */
2476     for (dest=rank+1; dest<size; dest++){
2477       svalues = work + rowners_bs[dest];
2478       count   = rowners_bs[dest+1]-rowners_bs[dest];
2479       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2480     }
2481   }
2482 
2483   /* receive values */
2484   if (rank){
2485     rvalues = work;
2486     count   = rowners_bs[rank+1]-rowners_bs[rank];
2487     for (source=0; source<rank; source++){
2488       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2489       /* process values */
2490       for (i=0; i<count; i++){
2491         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2492       }
2493     }
2494   }
2495 
2496   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2497   ierr = PetscFree(work);CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatSOR_MPISBAIJ"
2503 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2504 {
2505   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2506   PetscErrorCode ierr;
2507   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2508   PetscScalar    *x,*b,*ptr,*from;
2509   Vec            bb1;
2510 
2511   PetscFunctionBegin;
2512   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2513   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2514 
2515   if (flag == SOR_APPLY_UPPER) {
2516     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2517     PetscFunctionReturn(0);
2518   }
2519 
2520   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2521     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2523       its--;
2524     }
2525 
2526     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2527     while (its--){
2528 
2529       /* lower triangular part: slvec0b = - B^T*xx */
2530       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2531 
2532       /* copy xx into slvec0a */
2533       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2534       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2535       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2536       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2537 
2538       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2539 
2540       /* copy bb into slvec1a */
2541       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2542       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2543       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2544       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2545 
2546       /* set slvec1b = 0 */
2547       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2548 
2549       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2550       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2551       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2552       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2553 
2554       /* upper triangular part: bb1 = bb1 - B*x */
2555       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2556 
2557       /* local diagonal sweep */
2558       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2559     }
2560     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2561   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2562     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2563   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2564     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2565   } else if (flag & SOR_EISENSTAT) {
2566     Vec               xx1;
2567     PetscTruth        hasop;
2568     const PetscScalar *diag;
2569     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2570     PetscInt          i,n;
2571 
2572     if (!mat->xx1) {
2573       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2574       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2575     }
2576     xx1 = mat->xx1;
2577     bb1 = mat->bb1;
2578 
2579     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2580 
2581     if (!mat->diag) {
2582       /* this is wrong for same matrix with new nonzero values */
2583       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2584       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2585     }
2586     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2587 
2588     if (hasop) {
2589       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2590       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2591     } else {
2592       /*
2593           These two lines are replaced by code that may be a bit faster for a good compiler
2594       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2595       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2596       */
2597       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2598       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2599       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2600       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2601       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2602       if (omega == 1.0) {
2603 	for (i=0; i<n; i++) {
2604 	  sl[i] = b[i] - diag[i]*x[i];
2605 	}
2606         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2607       } else {
2608 	for (i=0; i<n; i++) {
2609 	  sl[i] = b[i] + scale*diag[i]*x[i];
2610 	}
2611         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2612       }
2613       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2614       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2615       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2616       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2617     }
2618 
2619     /* multiply off-diagonal portion of matrix */
2620     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2621     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2622     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2623     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2624     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2625     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2626     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2627     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2628     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2629     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2630 
2631     /* local sweep */
2632     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2633     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2634   } else {
2635     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2636   }
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 #undef __FUNCT__
2641 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2642 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2643 {
2644   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2645   PetscErrorCode ierr;
2646   Vec            lvec1,bb1;
2647 
2648   PetscFunctionBegin;
2649   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2650   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2651 
2652   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2653     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2654       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2655       its--;
2656     }
2657 
2658     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2659     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2660     while (its--){
2661       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2662 
2663       /* lower diagonal part: bb1 = bb - B^T*xx */
2664       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2665       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2666 
2667       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2668       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2669       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2670 
2671       /* upper diagonal part: bb1 = bb1 - B*x */
2672       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2673       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2674 
2675       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2676 
2677       /* diagonal sweep */
2678       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2679     }
2680     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2681     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2682   } else {
2683     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2684   }
2685   PetscFunctionReturn(0);
2686 }
2687 
2688