xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 22982a5f1e5ef536d7b0e4ffb77c4c6450d5b280)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap->rstart;
146   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
147   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169   if (v) PetscValidScalarPointer(v,6);
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++  = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend){
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         }
309         else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined (PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined (PETSC_USE_CTABLE)
330 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col =  in[j];
338             }
339           }
340           else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend){
376           col = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined (PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data --;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else {
396       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
397     }
398   }
399  PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "MatNorm_MPISBAIJ"
404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
405 {
406   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
407   PetscErrorCode ierr;
408   PetscReal      sum[2],*lnorm2;
409 
410   PetscFunctionBegin;
411   if (baij->size == 1) {
412     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
413   } else {
414     if (type == NORM_FROBENIUS) {
415       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
416       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
418       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
420       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
421       *norm = sqrt(sum[0] + 2*sum[1]);
422       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
423     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
424       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
425       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
426       PetscReal    *rsum,*rsum2,vabs;
427       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
428       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
429       MatScalar    *v;
430 
431       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
432       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
433       /* Amat */
434       v = amat->a; jj = amat->j;
435       for (brow=0; brow<mbs; brow++) {
436         grow = bs*(rstart + brow);
437         nz = amat->i[brow+1] - amat->i[brow];
438         for (bcol=0; bcol<nz; bcol++){
439           gcol = bs*(rstart + *jj); jj++;
440           for (col=0; col<bs; col++){
441             for (row=0; row<bs; row++){
442               vabs = PetscAbsScalar(*v); v++;
443               rsum[gcol+col] += vabs;
444               /* non-diagonal block */
445               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
446             }
447           }
448         }
449       }
450       /* Bmat */
451       v = bmat->a; jj = bmat->j;
452       for (brow=0; brow<mbs; brow++) {
453         grow = bs*(rstart + brow);
454         nz = bmat->i[brow+1] - bmat->i[brow];
455         for (bcol=0; bcol<nz; bcol++){
456           gcol = bs*garray[*jj]; jj++;
457           for (col=0; col<bs; col++){
458             for (row=0; row<bs; row++){
459               vabs = PetscAbsScalar(*v); v++;
460               rsum[gcol+col] += vabs;
461               rsum[grow+row] += vabs;
462             }
463           }
464         }
465       }
466       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
467       *norm = 0.0;
468       for (col=0; col<mat->cmap->N; col++) {
469         if (rsum2[col] > *norm) *norm = rsum2[col];
470       }
471       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
472     } else {
473       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
474     }
475   }
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
481 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
484   PetscErrorCode ierr;
485   PetscInt       nstash,reallocs;
486   InsertMode     addv;
487 
488   PetscFunctionBegin;
489   if (baij->donotstash) {
490     PetscFunctionReturn(0);
491   }
492 
493   /* make sure all processors are either in INSERTMODE or ADDMODE */
494   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
495   if (addv == (ADD_VALUES|INSERT_VALUES)) {
496     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
497   }
498   mat->insertmode = addv; /* in case this processor had no cache */
499 
500   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
501   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
502   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
503   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
504   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
505   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
511 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
512 {
513   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
514   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
515   PetscErrorCode ierr;
516   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
517   PetscInt       *row,*col;
518   PetscTruth     other_disassembled;
519   PetscMPIInt    n;
520   PetscTruth     r1,r2,r3;
521   MatScalar      *val;
522   InsertMode     addv = mat->insertmode;
523 
524   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
525   PetscFunctionBegin;
526 
527   if (!baij->donotstash) {
528     while (1) {
529       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
530       if (!flg) break;
531 
532       for (i=0; i<n;) {
533         /* Now identify the consecutive vals belonging to the same row */
534         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
535         if (j < n) ncols = j-i;
536         else       ncols = n-i;
537         /* Now assemble all these values with a single function call */
538         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
539         i = j;
540       }
541     }
542     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
543     /* Now process the block-stash. Since the values are stashed column-oriented,
544        set the roworiented flag to column oriented, and after MatSetValues()
545        restore the original flags */
546     r1 = baij->roworiented;
547     r2 = a->roworiented;
548     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
549     baij->roworiented = PETSC_FALSE;
550     a->roworiented    = PETSC_FALSE;
551     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
552     while (1) {
553       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
554       if (!flg) break;
555 
556       for (i=0; i<n;) {
557         /* Now identify the consecutive vals belonging to the same row */
558         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566     baij->roworiented = r1;
567     a->roworiented    = r2;
568     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
569   }
570 
571   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
572   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
573 
574   /* determine if any processor has disassembled, if so we must
575      also disassemble ourselfs, in order that we may reassemble. */
576   /*
577      if nonzero structure of submatrix B cannot change then we know that
578      no processor disassembled thus we can skip this stuff
579   */
580   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
581     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
582     if (mat->was_assembled && !other_disassembled) {
583       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
584     }
585   }
586 
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
589   }
590   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
591   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
592   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
593 
594   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
595   baij->rowvalues = 0;
596 
597   PetscFunctionReturn(0);
598 }
599 
600 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
601 #undef __FUNCT__
602 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
603 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
604 {
605   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
606   PetscErrorCode    ierr;
607   PetscInt          bs = mat->rmap->bs;
608   PetscMPIInt       size = baij->size,rank = baij->rank;
609   PetscTruth        iascii,isdraw;
610   PetscViewer       sviewer;
611   PetscViewerFormat format;
612 
613   PetscFunctionBegin;
614   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
616   if (iascii) {
617     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
618     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
619       MatInfo info;
620       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
621       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
622       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
623               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
624               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
625       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
629       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
630       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
631       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
632       PetscFunctionReturn(0);
633     } else if (format == PETSC_VIEWER_ASCII_INFO) {
634       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
637       PetscFunctionReturn(0);
638     }
639   }
640 
641   if (isdraw) {
642     PetscDraw  draw;
643     PetscTruth isnull;
644     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
645     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
646   }
647 
648   if (size == 1) {
649     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
650     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
651   } else {
652     /* assemble the entire matrix onto first processor. */
653     Mat          A;
654     Mat_SeqSBAIJ *Aloc;
655     Mat_SeqBAIJ  *Bloc;
656     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
657     MatScalar    *a;
658 
659     /* Should this be the same type as mat? */
660     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
661     if (!rank) {
662       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
663     } else {
664       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
665     }
666     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
667     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
668     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
669 
670     /* copy over the A part */
671     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
672     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
673     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
674 
675     for (i=0; i<mbs; i++) {
676       rvals[0] = bs*(baij->rstartbs + i);
677       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
678       for (j=ai[i]; j<ai[i+1]; j++) {
679         col = (baij->cstartbs+aj[j])*bs;
680         for (k=0; k<bs; k++) {
681           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
682           col++; a += bs;
683         }
684       }
685     }
686     /* copy over the B part */
687     Bloc = (Mat_SeqBAIJ*)baij->B->data;
688     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
689     for (i=0; i<mbs; i++) {
690 
691       rvals[0] = bs*(baij->rstartbs + i);
692       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
693       for (j=ai[i]; j<ai[i+1]; j++) {
694         col = baij->garray[aj[j]]*bs;
695         for (k=0; k<bs; k++) {
696           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
697           col++; a += bs;
698         }
699       }
700     }
701     ierr = PetscFree(rvals);CHKERRQ(ierr);
702     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
703     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     /*
705        Everyone has to call to draw the matrix since the graphics waits are
706        synchronized across all processors that share the PetscDraw object
707     */
708     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
709     if (!rank) {
710       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
711       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
712     }
713     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
714     ierr = MatDestroy(A);CHKERRQ(ierr);
715   }
716   PetscFunctionReturn(0);
717 }
718 
719 #undef __FUNCT__
720 #define __FUNCT__ "MatView_MPISBAIJ"
721 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
722 {
723   PetscErrorCode ierr;
724   PetscTruth     iascii,isdraw,issocket,isbinary;
725 
726   PetscFunctionBegin;
727   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
731   if (iascii || isdraw || issocket || isbinary) {
732     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
733   } else {
734     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
735   }
736   PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatDestroy_MPISBAIJ"
741 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
742 {
743   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
744   PetscErrorCode ierr;
745 
746   PetscFunctionBegin;
747 #if defined(PETSC_USE_LOG)
748   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
749 #endif
750   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
751   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
752   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
754 #if defined (PETSC_USE_CTABLE)
755   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
756 #else
757   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
758 #endif
759   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
760   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
761   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
762   if (baij->slvec0) {
763     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
764     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
765   }
766   if (baij->slvec1) {
767     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
768     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
770   }
771   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
772   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
773   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
774   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
775   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
776   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
777   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
778 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
779   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
780 #endif
781   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
782   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
784   ierr = PetscFree(baij);CHKERRQ(ierr);
785 
786   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
796 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
797 {
798   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
799   PetscErrorCode ierr;
800   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
801   PetscScalar    *x,*from;
802 
803   PetscFunctionBegin;
804   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
805   if (nt != A->cmap->n) {
806     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807   }
808 
809   /* diagonal part */
810   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
811   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
812 
813   /* subdiagonal part */
814   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
815 
816   /* copy x into the vec slvec0 */
817   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
818   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819 
820   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
821   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   /* supperdiagonal part */
827   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "MatMult_MPISBAIJ"
833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
834 {
835   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
836   PetscErrorCode ierr;
837   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
838   PetscScalar    *x,*from;
839 
840   PetscFunctionBegin;
841   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
842   if (nt != A->cmap->n) {
843     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844   }
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) {
879     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
880   }
881   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
882   if (nt != A->rmap->N) {
883     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
884   }
885 
886   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   /* do diagonal part */
888   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
889   /* do supperdiagonal part */
890   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
892   /* do subdiagonal part */
893   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
894   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
895   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
896 
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
902 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
903 {
904   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
905   PetscErrorCode ierr;
906   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
907   PetscScalar    *x,*from,zero=0.0;
908 
909   PetscFunctionBegin;
910   /*
911   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
912   PetscSynchronizedFlush(((PetscObject)A)->comm);
913   */
914   /* diagonal part */
915   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
916   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
917 
918   /* subdiagonal part */
919   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
920 
921   /* copy x into the vec slvec0 */
922   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
923   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
924   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
925   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
926 
927   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
930 
931   /* supperdiagonal part */
932   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
933 
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
939 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
940 {
941   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
942   PetscErrorCode ierr;
943 
944   PetscFunctionBegin;
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   /* do diagonal part */
947   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
948   /* do supperdiagonal part */
949   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
950   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
951 
952   /* do subdiagonal part */
953   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
954   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
955   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
956 
957   PetscFunctionReturn(0);
958 }
959 
960 /*
961   This only works correctly for square matrices where the subblock A->A is the
962    diagonal block
963 */
964 #undef __FUNCT__
965 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
966 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
967 {
968   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
973   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatScale_MPISBAIJ"
979 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
980 {
981   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
986   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatGetRow_MPISBAIJ"
992 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
993 {
994   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
995   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
996   PetscErrorCode ierr;
997   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
998   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
999   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1000 
1001   PetscFunctionBegin;
1002   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1003   mat->getrowactive = PETSC_TRUE;
1004 
1005   if (!mat->rowvalues && (idx || v)) {
1006     /*
1007         allocate enough space to hold information from the longest row.
1008     */
1009     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1010     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1011     PetscInt     max = 1,mbs = mat->mbs,tmp;
1012     for (i=0; i<mbs; i++) {
1013       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1014       if (max < tmp) { max = tmp; }
1015     }
1016     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1017   }
1018 
1019   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1020   lrow = row - brstart;  /* local row index */
1021 
1022   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1023   if (!v)   {pvA = 0; pvB = 0;}
1024   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1025   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1026   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1027   nztot = nzA + nzB;
1028 
1029   cmap  = mat->garray;
1030   if (v  || idx) {
1031     if (nztot) {
1032       /* Sort by increasing column numbers, assuming A and B already sorted */
1033       PetscInt imark = -1;
1034       if (v) {
1035         *v = v_p = mat->rowvalues;
1036         for (i=0; i<nzB; i++) {
1037           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1038           else break;
1039         }
1040         imark = i;
1041         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1042         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1043       }
1044       if (idx) {
1045         *idx = idx_p = mat->rowindices;
1046         if (imark > -1) {
1047           for (i=0; i<imark; i++) {
1048             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1049           }
1050         } else {
1051           for (i=0; i<nzB; i++) {
1052             if (cmap[cworkB[i]/bs] < cstart)
1053               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054             else break;
1055           }
1056           imark = i;
1057         }
1058         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1059         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1060       }
1061     } else {
1062       if (idx) *idx = 0;
1063       if (v)   *v   = 0;
1064     }
1065   }
1066   *nz = nztot;
1067   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1068   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1074 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1075 {
1076   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1077 
1078   PetscFunctionBegin;
1079   if (!baij->getrowactive) {
1080     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1081   }
1082   baij->getrowactive = PETSC_FALSE;
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1088 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1089 {
1090   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1091   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1092 
1093   PetscFunctionBegin;
1094   aA->getrow_utriangular = PETSC_TRUE;
1095   PetscFunctionReturn(0);
1096 }
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1099 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1100 {
1101   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1102   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1103 
1104   PetscFunctionBegin;
1105   aA->getrow_utriangular = PETSC_FALSE;
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1111 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1112 {
1113   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1118   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1124 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1131   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1137 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1138 {
1139   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1144   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1150 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1151 {
1152   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1153   Mat            A = a->A,B = a->B;
1154   PetscErrorCode ierr;
1155   PetscReal      isend[5],irecv[5];
1156 
1157   PetscFunctionBegin;
1158   info->block_size     = (PetscReal)matin->rmap->bs;
1159   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1161   isend[3] = info->memory;  isend[4] = info->mallocs;
1162   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1163   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1164   isend[3] += info->memory;  isend[4] += info->mallocs;
1165   if (flag == MAT_LOCAL) {
1166     info->nz_used      = isend[0];
1167     info->nz_allocated = isend[1];
1168     info->nz_unneeded  = isend[2];
1169     info->memory       = isend[3];
1170     info->mallocs      = isend[4];
1171   } else if (flag == MAT_GLOBAL_MAX) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else if (flag == MAT_GLOBAL_SUM) {
1179     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1180     info->nz_used      = irecv[0];
1181     info->nz_allocated = irecv[1];
1182     info->nz_unneeded  = irecv[2];
1183     info->memory       = irecv[3];
1184     info->mallocs      = irecv[4];
1185   } else {
1186     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1187   }
1188   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1189   info->fill_ratio_needed = 0;
1190   info->factor_mallocs    = 0;
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1196 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1197 {
1198   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1199   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NEW_NONZERO_LOCATIONS:
1205   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1206   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1207   case MAT_KEEP_NONZERO_PATTERN:
1208   case MAT_NEW_NONZERO_LOCATION_ERR:
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_ROW_ORIENTED:
1213     a->roworiented = flg;
1214     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1215     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1216     break;
1217   case MAT_NEW_DIAGONALS:
1218     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1219     break;
1220   case MAT_IGNORE_OFF_PROC_ENTRIES:
1221     a->donotstash = flg;
1222     break;
1223   case MAT_USE_HASH_TABLE:
1224     a->ht_flag = flg;
1225     break;
1226   case MAT_HERMITIAN:
1227     if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1228     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1229     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1230     break;
1231   case MAT_SYMMETRIC:
1232     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1233     break;
1234   case MAT_STRUCTURALLY_SYMMETRIC:
1235     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1236     break;
1237   case MAT_SYMMETRY_ETERNAL:
1238     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1239     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1240     break;
1241   case MAT_IGNORE_LOWER_TRIANGULAR:
1242     aA->ignore_ltriangular = flg;
1243     break;
1244   case MAT_ERROR_LOWER_TRIANGULAR:
1245     aA->ignore_ltriangular = flg;
1246     break;
1247   case MAT_GETROW_UPPERTRIANGULAR:
1248     aA->getrow_utriangular = flg;
1249     break;
1250   default:
1251     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1252   }
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1258 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1259 {
1260   PetscErrorCode ierr;
1261   PetscFunctionBegin;
1262   if (MAT_INITIAL_MATRIX || *B != A) {
1263     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1270 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1271 {
1272   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1273   Mat            a=baij->A, b=baij->B;
1274   PetscErrorCode ierr;
1275   PetscInt       nv,m,n;
1276   PetscTruth     flg;
1277 
1278   PetscFunctionBegin;
1279   if (ll != rr){
1280     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1281     if (!flg)
1282       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1283   }
1284   if (!ll) PetscFunctionReturn(0);
1285 
1286   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1287   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1288 
1289   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1290   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1291 
1292   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293 
1294   /* left diagonalscale the off-diagonal part */
1295   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1296 
1297   /* scale the diagonal part */
1298   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1299 
1300   /* right diagonalscale the off-diagonal part */
1301   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1302   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1308 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1309 {
1310   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1311   PetscErrorCode ierr;
1312 
1313   PetscFunctionBegin;
1314   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatEqual_MPISBAIJ"
1322 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1323 {
1324   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1325   Mat            a,b,c,d;
1326   PetscTruth     flg;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   a = matA->A; b = matA->B;
1331   c = matB->A; d = matB->B;
1332 
1333   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1334   if (flg) {
1335     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1336   }
1337   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatCopy_MPISBAIJ"
1343 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1344 {
1345   PetscErrorCode ierr;
1346   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1347   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1348 
1349   PetscFunctionBegin;
1350   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1351   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1352     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1353     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1354     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1355   } else {
1356     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1357     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1358   }
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1364 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1365 {
1366   PetscErrorCode ierr;
1367 
1368   PetscFunctionBegin;
1369   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #include "petscblaslapack.h"
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1377 {
1378   PetscErrorCode ierr;
1379   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1380   PetscBLASInt   bnz,one=1;
1381   Mat_SeqSBAIJ   *xa,*ya;
1382   Mat_SeqBAIJ    *xb,*yb;
1383 
1384   PetscFunctionBegin;
1385   if (str == SAME_NONZERO_PATTERN) {
1386     PetscScalar alpha = a;
1387     xa = (Mat_SeqSBAIJ *)xx->A->data;
1388     ya = (Mat_SeqSBAIJ *)yy->A->data;
1389     bnz = PetscBLASIntCast(xa->nz);
1390     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1391     xb = (Mat_SeqBAIJ *)xx->B->data;
1392     yb = (Mat_SeqBAIJ *)yy->B->data;
1393     bnz = PetscBLASIntCast(xb->nz);
1394     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1395   } else {
1396     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1397     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1398     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1406 {
1407   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1408   PetscInt        rbs,cbs;
1409   PetscErrorCode  ierr;
1410 
1411   PetscFunctionBegin;
1412   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1413   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1414   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1415   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1416   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1417   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1424 {
1425   PetscErrorCode ierr;
1426   PetscInt       i;
1427   PetscTruth     flg;
1428 
1429   PetscFunctionBegin;
1430   for (i=0; i<n; i++) {
1431     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1432     if (!flg) {
1433       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1434     }
1435   }
1436   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 
1441 /* -------------------------------------------------------------------*/
1442 static struct _MatOps MatOps_Values = {
1443        MatSetValues_MPISBAIJ,
1444        MatGetRow_MPISBAIJ,
1445        MatRestoreRow_MPISBAIJ,
1446        MatMult_MPISBAIJ,
1447 /* 4*/ MatMultAdd_MPISBAIJ,
1448        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1449        MatMultAdd_MPISBAIJ,
1450        0,
1451        0,
1452        0,
1453 /*10*/ 0,
1454        0,
1455        0,
1456        MatSOR_MPISBAIJ,
1457        MatTranspose_MPISBAIJ,
1458 /*15*/ MatGetInfo_MPISBAIJ,
1459        MatEqual_MPISBAIJ,
1460        MatGetDiagonal_MPISBAIJ,
1461        MatDiagonalScale_MPISBAIJ,
1462        MatNorm_MPISBAIJ,
1463 /*20*/ MatAssemblyBegin_MPISBAIJ,
1464        MatAssemblyEnd_MPISBAIJ,
1465        MatSetOption_MPISBAIJ,
1466        MatZeroEntries_MPISBAIJ,
1467 /*24*/ 0,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477 /*34*/ MatDuplicate_MPISBAIJ,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*39*/ MatAXPY_MPISBAIJ,
1483        MatGetSubMatrices_MPISBAIJ,
1484        MatIncreaseOverlap_MPISBAIJ,
1485        MatGetValues_MPISBAIJ,
1486        MatCopy_MPISBAIJ,
1487 /*44*/ 0,
1488        MatScale_MPISBAIJ,
1489        0,
1490        0,
1491        0,
1492 /*49*/ MatSetBlockSize_MPISBAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497 /*54*/ 0,
1498        0,
1499        MatSetUnfactored_MPISBAIJ,
1500        0,
1501        MatSetValuesBlocked_MPISBAIJ,
1502 /*59*/ 0,
1503        0,
1504        0,
1505        0,
1506        0,
1507 /*64*/ 0,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1513        0,
1514        0,
1515        0,
1516        0,
1517 /*74*/ 0,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*79*/ 0,
1523        0,
1524        0,
1525        0,
1526        MatLoad_MPISBAIJ,
1527 /*84*/ 0,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*89*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*94*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*99*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*104*/0,
1548        MatRealPart_MPISBAIJ,
1549        MatImaginaryPart_MPISBAIJ,
1550        MatGetRowUpperTriangular_MPISBAIJ,
1551        MatRestoreRowUpperTriangular_MPISBAIJ
1552 };
1553 
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1558 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1559 {
1560   PetscFunctionBegin;
1561   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1562   *iscopy = PETSC_FALSE;
1563   PetscFunctionReturn(0);
1564 }
1565 EXTERN_C_END
1566 
1567 EXTERN_C_BEGIN
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1570 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1571 {
1572   Mat_MPISBAIJ   *b;
1573   PetscErrorCode ierr;
1574   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1575 
1576   PetscFunctionBegin;
1577   if (bs < 0){
1578     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1579       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1580     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1581     bs   = PetscAbs(bs);
1582   }
1583   if ((d_nnz || o_nnz) && newbs != bs) {
1584     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1585   }
1586   bs = newbs;
1587 
1588   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1589   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1590   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1591   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1592 
1593   B->rmap->bs = B->cmap->bs = bs;
1594   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1595   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1596 
1597   if (d_nnz) {
1598     for (i=0; i<B->rmap->n/bs; i++) {
1599       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1600     }
1601   }
1602   if (o_nnz) {
1603     for (i=0; i<B->rmap->n/bs; i++) {
1604       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1605     }
1606   }
1607 
1608   b   = (Mat_MPISBAIJ*)B->data;
1609   mbs = B->rmap->n/bs;
1610   Mbs = B->rmap->N/bs;
1611   if (mbs*bs != B->rmap->n) {
1612     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1613   }
1614 
1615   B->rmap->bs  = bs;
1616   b->bs2 = bs*bs;
1617   b->mbs = mbs;
1618   b->nbs = mbs;
1619   b->Mbs = Mbs;
1620   b->Nbs = Mbs;
1621 
1622   for (i=0; i<=b->size; i++) {
1623     b->rangebs[i] = B->rmap->range[i]/bs;
1624   }
1625   b->rstartbs = B->rmap->rstart/bs;
1626   b->rendbs   = B->rmap->rend/bs;
1627 
1628   b->cstartbs = B->cmap->rstart/bs;
1629   b->cendbs   = B->cmap->rend/bs;
1630 
1631   if (!B->preallocated) {
1632     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1633     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1634     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1635     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1636     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1637     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1638     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1639     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1640     ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->bstash);CHKERRQ(ierr);
1641   }
1642 
1643   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1644   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1645   B->preallocated = PETSC_TRUE;
1646   PetscFunctionReturn(0);
1647 }
1648 EXTERN_C_END
1649 
1650 EXTERN_C_BEGIN
1651 #if defined(PETSC_HAVE_MUMPS)
1652 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1653 #endif
1654 #if defined(PETSC_HAVE_SPOOLES)
1655 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1656 #endif
1657 #if defined(PETSC_HAVE_PASTIX)
1658 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1659 #endif
1660 EXTERN_C_END
1661 
1662 /*MC
1663    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1664    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1665 
1666    Options Database Keys:
1667 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1668 
1669   Level: beginner
1670 
1671 .seealso: MatCreateMPISBAIJ
1672 M*/
1673 
1674 EXTERN_C_BEGIN
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatCreate_MPISBAIJ"
1677 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1678 {
1679   Mat_MPISBAIJ   *b;
1680   PetscErrorCode ierr;
1681   PetscTruth     flg;
1682 
1683   PetscFunctionBegin;
1684 
1685   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1686   B->data = (void*)b;
1687   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1688 
1689   B->ops->destroy    = MatDestroy_MPISBAIJ;
1690   B->ops->view       = MatView_MPISBAIJ;
1691   B->mapping         = 0;
1692   B->assembled       = PETSC_FALSE;
1693 
1694   B->insertmode = NOT_SET_VALUES;
1695   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1696   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1697 
1698   /* build local table of row and column ownerships */
1699   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1700 
1701   /* build cache for off array entries formed */
1702   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1703   b->donotstash  = PETSC_FALSE;
1704   b->colmap      = PETSC_NULL;
1705   b->garray      = PETSC_NULL;
1706   b->roworiented = PETSC_TRUE;
1707 
1708   /* stuff used in block assembly */
1709   b->barray       = 0;
1710 
1711   /* stuff used for matrix vector multiply */
1712   b->lvec         = 0;
1713   b->Mvctx        = 0;
1714   b->slvec0       = 0;
1715   b->slvec0b      = 0;
1716   b->slvec1       = 0;
1717   b->slvec1a      = 0;
1718   b->slvec1b      = 0;
1719   b->sMvctx       = 0;
1720 
1721   /* stuff for MatGetRow() */
1722   b->rowindices   = 0;
1723   b->rowvalues    = 0;
1724   b->getrowactive = PETSC_FALSE;
1725 
1726   /* hash table stuff */
1727   b->ht           = 0;
1728   b->hd           = 0;
1729   b->ht_size      = 0;
1730   b->ht_flag      = PETSC_FALSE;
1731   b->ht_fact      = 0;
1732   b->ht_total_ct  = 0;
1733   b->ht_insert_ct = 0;
1734 
1735   b->in_loc       = 0;
1736   b->v_loc        = 0;
1737   b->n_loc        = 0;
1738   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1739     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1740     if (flg) {
1741       PetscReal fact = 1.39;
1742       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1743       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1744       if (fact <= 1.0) fact = 1.39;
1745       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1746       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1747     }
1748   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1749 
1750 #if defined(PETSC_HAVE_PASTIX)
1751   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1752 					   "MatGetFactor_mpisbaij_pastix",
1753 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1754 #endif
1755 #if defined(PETSC_HAVE_MUMPS)
1756   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1757                                      "MatGetFactor_mpisbaij_mumps",
1758                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1759 #endif
1760 #if defined(PETSC_HAVE_SPOOLES)
1761   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1762                                      "MatGetFactor_mpisbaij_spooles",
1763                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1764 #endif
1765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1766                                      "MatStoreValues_MPISBAIJ",
1767                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1769                                      "MatRetrieveValues_MPISBAIJ",
1770                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1772                                      "MatGetDiagonalBlock_MPISBAIJ",
1773                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1775                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1776                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1777   B->symmetric                  = PETSC_TRUE;
1778   B->structurally_symmetric     = PETSC_TRUE;
1779   B->symmetric_set              = PETSC_TRUE;
1780   B->structurally_symmetric_set = PETSC_TRUE;
1781   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1782   PetscFunctionReturn(0);
1783 }
1784 EXTERN_C_END
1785 
1786 /*MC
1787    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1788 
1789    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1790    and MATMPISBAIJ otherwise.
1791 
1792    Options Database Keys:
1793 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1794 
1795   Level: beginner
1796 
1797 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1798 M*/
1799 
1800 EXTERN_C_BEGIN
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatCreate_SBAIJ"
1803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1804 {
1805   PetscErrorCode ierr;
1806   PetscMPIInt    size;
1807 
1808   PetscFunctionBegin;
1809   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1810   if (size == 1) {
1811     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1812   } else {
1813     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1814   }
1815   PetscFunctionReturn(0);
1816 }
1817 EXTERN_C_END
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1821 /*@C
1822    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1823    the user should preallocate the matrix storage by setting the parameters
1824    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1825    performance can be increased by more than a factor of 50.
1826 
1827    Collective on Mat
1828 
1829    Input Parameters:
1830 +  A - the matrix
1831 .  bs   - size of blockk
1832 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1833            submatrix  (same for all local rows)
1834 .  d_nnz - array containing the number of block nonzeros in the various block rows
1835            in the upper triangular and diagonal part of the in diagonal portion of the local
1836            (possibly different for each block row) or PETSC_NULL.  You must leave room
1837            for the diagonal entry even if it is zero.
1838 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1839            submatrix (same for all local rows).
1840 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1841            off-diagonal portion of the local submatrix (possibly different for
1842            each block row) or PETSC_NULL.
1843 
1844 
1845    Options Database Keys:
1846 .   -mat_no_unroll - uses code that does not unroll the loops in the
1847                      block calculations (much slower)
1848 .   -mat_block_size - size of the blocks to use
1849 
1850    Notes:
1851 
1852    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1853    than it must be used on all processors that share the object for that argument.
1854 
1855    If the *_nnz parameter is given then the *_nz parameter is ignored
1856 
1857    Storage Information:
1858    For a square global matrix we define each processor's diagonal portion
1859    to be its local rows and the corresponding columns (a square submatrix);
1860    each processor's off-diagonal portion encompasses the remainder of the
1861    local matrix (a rectangular submatrix).
1862 
1863    The user can specify preallocated storage for the diagonal part of
1864    the local submatrix with either d_nz or d_nnz (not both).  Set
1865    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1866    memory allocation.  Likewise, specify preallocated storage for the
1867    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1868 
1869    You can call MatGetInfo() to get information on how effective the preallocation was;
1870    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1871    You can also run with the option -info and look for messages with the string
1872    malloc in them to see if additional memory allocation was needed.
1873 
1874    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1875    the figure below we depict these three local rows and all columns (0-11).
1876 
1877 .vb
1878            0 1 2 3 4 5 6 7 8 9 10 11
1879           -------------------
1880    row 3  |  o o o d d d o o o o o o
1881    row 4  |  o o o d d d o o o o o o
1882    row 5  |  o o o d d d o o o o o o
1883           -------------------
1884 .ve
1885 
1886    Thus, any entries in the d locations are stored in the d (diagonal)
1887    submatrix, and any entries in the o locations are stored in the
1888    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1889    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1890 
1891    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1892    plus the diagonal part of the d matrix,
1893    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1894    In general, for PDE problems in which most nonzeros are near the diagonal,
1895    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1896    or you will get TERRIBLE performance; see the users' manual chapter on
1897    matrices.
1898 
1899    Level: intermediate
1900 
1901 .keywords: matrix, block, aij, compressed row, sparse, parallel
1902 
1903 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1904 @*/
1905 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1906 {
1907   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1908 
1909   PetscFunctionBegin;
1910   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1911   if (f) {
1912     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatCreateMPISBAIJ"
1919 /*@C
1920    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1921    (block compressed row).  For good matrix assembly performance
1922    the user should preallocate the matrix storage by setting the parameters
1923    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1924    performance can be increased by more than a factor of 50.
1925 
1926    Collective on MPI_Comm
1927 
1928    Input Parameters:
1929 +  comm - MPI communicator
1930 .  bs   - size of blockk
1931 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1932            This value should be the same as the local size used in creating the
1933            y vector for the matrix-vector product y = Ax.
1934 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1935            This value should be the same as the local size used in creating the
1936            x vector for the matrix-vector product y = Ax.
1937 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1938 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1939 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1940            submatrix  (same for all local rows)
1941 .  d_nnz - array containing the number of block nonzeros in the various block rows
1942            in the upper triangular portion of the in diagonal portion of the local
1943            (possibly different for each block block row) or PETSC_NULL.
1944            You must leave room for the diagonal entry even if it is zero.
1945 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1946            submatrix (same for all local rows).
1947 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1948            off-diagonal portion of the local submatrix (possibly different for
1949            each block row) or PETSC_NULL.
1950 
1951    Output Parameter:
1952 .  A - the matrix
1953 
1954    Options Database Keys:
1955 .   -mat_no_unroll - uses code that does not unroll the loops in the
1956                      block calculations (much slower)
1957 .   -mat_block_size - size of the blocks to use
1958 .   -mat_mpi - use the parallel matrix data structures even on one processor
1959                (defaults to using SeqBAIJ format on one processor)
1960 
1961    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1962    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1963    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1964 
1965    Notes:
1966    The number of rows and columns must be divisible by blocksize.
1967    This matrix type does not support complex Hermitian operation.
1968 
1969    The user MUST specify either the local or global matrix dimensions
1970    (possibly both).
1971 
1972    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1973    than it must be used on all processors that share the object for that argument.
1974 
1975    If the *_nnz parameter is given then the *_nz parameter is ignored
1976 
1977    Storage Information:
1978    For a square global matrix we define each processor's diagonal portion
1979    to be its local rows and the corresponding columns (a square submatrix);
1980    each processor's off-diagonal portion encompasses the remainder of the
1981    local matrix (a rectangular submatrix).
1982 
1983    The user can specify preallocated storage for the diagonal part of
1984    the local submatrix with either d_nz or d_nnz (not both).  Set
1985    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1986    memory allocation.  Likewise, specify preallocated storage for the
1987    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1988 
1989    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1990    the figure below we depict these three local rows and all columns (0-11).
1991 
1992 .vb
1993            0 1 2 3 4 5 6 7 8 9 10 11
1994           -------------------
1995    row 3  |  o o o d d d o o o o o o
1996    row 4  |  o o o d d d o o o o o o
1997    row 5  |  o o o d d d o o o o o o
1998           -------------------
1999 .ve
2000 
2001    Thus, any entries in the d locations are stored in the d (diagonal)
2002    submatrix, and any entries in the o locations are stored in the
2003    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2004    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2005 
2006    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2007    plus the diagonal part of the d matrix,
2008    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2009    In general, for PDE problems in which most nonzeros are near the diagonal,
2010    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2011    or you will get TERRIBLE performance; see the users' manual chapter on
2012    matrices.
2013 
2014    Level: intermediate
2015 
2016 .keywords: matrix, block, aij, compressed row, sparse, parallel
2017 
2018 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2019 @*/
2020 
2021 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2022 {
2023   PetscErrorCode ierr;
2024   PetscMPIInt    size;
2025 
2026   PetscFunctionBegin;
2027   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2028   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2029   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2030   if (size > 1) {
2031     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2032     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2033   } else {
2034     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2035     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2036   }
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2043 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2044 {
2045   Mat            mat;
2046   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2047   PetscErrorCode ierr;
2048   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2049   PetscScalar    *array;
2050 
2051   PetscFunctionBegin;
2052   *newmat       = 0;
2053   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2054   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2055   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2056   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2057   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2058   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2059 
2060   mat->factor       = matin->factor;
2061   mat->preallocated = PETSC_TRUE;
2062   mat->assembled    = PETSC_TRUE;
2063   mat->insertmode   = NOT_SET_VALUES;
2064 
2065   a = (Mat_MPISBAIJ*)mat->data;
2066   a->bs2   = oldmat->bs2;
2067   a->mbs   = oldmat->mbs;
2068   a->nbs   = oldmat->nbs;
2069   a->Mbs   = oldmat->Mbs;
2070   a->Nbs   = oldmat->Nbs;
2071 
2072 
2073   a->size         = oldmat->size;
2074   a->rank         = oldmat->rank;
2075   a->donotstash   = oldmat->donotstash;
2076   a->roworiented  = oldmat->roworiented;
2077   a->rowindices   = 0;
2078   a->rowvalues    = 0;
2079   a->getrowactive = PETSC_FALSE;
2080   a->barray       = 0;
2081   a->rstartbs    = oldmat->rstartbs;
2082   a->rendbs      = oldmat->rendbs;
2083   a->cstartbs    = oldmat->cstartbs;
2084   a->cendbs      = oldmat->cendbs;
2085 
2086   /* hash table stuff */
2087   a->ht           = 0;
2088   a->hd           = 0;
2089   a->ht_size      = 0;
2090   a->ht_flag      = oldmat->ht_flag;
2091   a->ht_fact      = oldmat->ht_fact;
2092   a->ht_total_ct  = 0;
2093   a->ht_insert_ct = 0;
2094 
2095   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2096   if (oldmat->colmap) {
2097 #if defined (PETSC_USE_CTABLE)
2098     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2099 #else
2100     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2101     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2102     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2103 #endif
2104   } else a->colmap = 0;
2105 
2106   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2107     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2108     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2109     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2110   } else a->garray = 0;
2111 
2112   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2113   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2114   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2115   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2116   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2117 
2118   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2120   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2122 
2123   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2124   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2125   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2126   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2127   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2128   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2130   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2136 
2137   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2138   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2139   a->sMvctx = oldmat->sMvctx;
2140   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2141 
2142   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2144   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2146   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2147   *newmat = mat;
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #include "petscsys.h"
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatLoad_MPISBAIJ"
2155 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2156 {
2157   Mat            A;
2158   PetscErrorCode ierr;
2159   PetscInt       i,nz,j,rstart,rend;
2160   PetscScalar    *vals,*buf;
2161   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2162   MPI_Status     status;
2163   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2164   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2165   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2166   PetscInt       bs=1,Mbs,mbs,extra_rows;
2167   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2168   PetscInt       dcount,kmax,k,nzcount,tmp;
2169   int            fd;
2170 
2171   PetscFunctionBegin;
2172   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2173     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2174   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2175 
2176   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2177   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2178   if (!rank) {
2179     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2180     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2181     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2182     if (header[3] < 0) {
2183       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2184     }
2185   }
2186 
2187   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2188   M = header[1]; N = header[2];
2189 
2190   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2191 
2192   /*
2193      This code adds extra rows to make sure the number of rows is
2194      divisible by the blocksize
2195   */
2196   Mbs        = M/bs;
2197   extra_rows = bs - M + bs*(Mbs);
2198   if (extra_rows == bs) extra_rows = 0;
2199   else                  Mbs++;
2200   if (extra_rows &&!rank) {
2201     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2202   }
2203 
2204   /* determine ownership of all rows */
2205   mbs        = Mbs/size + ((Mbs % size) > rank);
2206   m          = mbs*bs;
2207   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2208   mmbs       = PetscMPIIntCast(mbs);
2209   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2210   rowners[0] = 0;
2211   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2212   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2213   rstart = rowners[rank];
2214   rend   = rowners[rank+1];
2215 
2216   /* distribute row lengths to all processors */
2217   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2218   if (!rank) {
2219     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2220     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2221     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2222     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2223     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2224     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2225     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2226   } else {
2227     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2228   }
2229 
2230   if (!rank) {   /* procs[0] */
2231     /* calculate the number of nonzeros on each processor */
2232     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2233     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2234     for (i=0; i<size; i++) {
2235       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2236         procsnz[i] += rowlengths[j];
2237       }
2238     }
2239     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2240 
2241     /* determine max buffer needed and allocate it */
2242     maxnz = 0;
2243     for (i=0; i<size; i++) {
2244       maxnz = PetscMax(maxnz,procsnz[i]);
2245     }
2246     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2247 
2248     /* read in my part of the matrix column indices  */
2249     nz     = procsnz[0];
2250     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2251     mycols = ibuf;
2252     if (size == 1)  nz -= extra_rows;
2253     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2254     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2255 
2256     /* read in every ones (except the last) and ship off */
2257     for (i=1; i<size-1; i++) {
2258       nz   = procsnz[i];
2259       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2260       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2261     }
2262     /* read in the stuff for the last proc */
2263     if (size != 1) {
2264       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2265       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2266       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2267       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2268     }
2269     ierr = PetscFree(cols);CHKERRQ(ierr);
2270   } else {  /* procs[i], i>0 */
2271     /* determine buffer space needed for message */
2272     nz = 0;
2273     for (i=0; i<m; i++) {
2274       nz += locrowlens[i];
2275     }
2276     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2277     mycols = ibuf;
2278     /* receive message of column indices*/
2279     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2280     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2281     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2282   }
2283 
2284   /* loop over local rows, determining number of off diagonal entries */
2285   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2286   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2287   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2288   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2289   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2290   rowcount = 0;
2291   nzcount  = 0;
2292   for (i=0; i<mbs; i++) {
2293     dcount  = 0;
2294     odcount = 0;
2295     for (j=0; j<bs; j++) {
2296       kmax = locrowlens[rowcount];
2297       for (k=0; k<kmax; k++) {
2298         tmp = mycols[nzcount++]/bs; /* block col. index */
2299         if (!mask[tmp]) {
2300           mask[tmp] = 1;
2301           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2302           else masked1[dcount++] = tmp; /* entry in diag portion */
2303         }
2304       }
2305       rowcount++;
2306     }
2307 
2308     dlens[i]  = dcount;  /* d_nzz[i] */
2309     odlens[i] = odcount; /* o_nzz[i] */
2310 
2311     /* zero out the mask elements we set */
2312     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2313     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2314   }
2315 
2316   /* create our matrix */
2317   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2318   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2319   ierr = MatSetType(A,type);CHKERRQ(ierr);
2320   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2321   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2322 
2323   if (!rank) {
2324     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2325     /* read in my part of the matrix numerical values  */
2326     nz = procsnz[0];
2327     vals = buf;
2328     mycols = ibuf;
2329     if (size == 1)  nz -= extra_rows;
2330     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2331     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2332 
2333     /* insert into matrix */
2334     jj      = rstart*bs;
2335     for (i=0; i<m; i++) {
2336       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2337       mycols += locrowlens[i];
2338       vals   += locrowlens[i];
2339       jj++;
2340     }
2341 
2342     /* read in other processors (except the last one) and ship out */
2343     for (i=1; i<size-1; i++) {
2344       nz   = procsnz[i];
2345       vals = buf;
2346       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2347       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2348     }
2349     /* the last proc */
2350     if (size != 1){
2351       nz   = procsnz[i] - extra_rows;
2352       vals = buf;
2353       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2354       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2355       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2356     }
2357     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2358 
2359   } else {
2360     /* receive numeric values */
2361     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2362 
2363     /* receive message of values*/
2364     vals   = buf;
2365     mycols = ibuf;
2366     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2367     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2368     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2369 
2370     /* insert into matrix */
2371     jj      = rstart*bs;
2372     for (i=0; i<m; i++) {
2373       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2374       mycols += locrowlens[i];
2375       vals   += locrowlens[i];
2376       jj++;
2377     }
2378   }
2379 
2380   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2381   ierr = PetscFree(buf);CHKERRQ(ierr);
2382   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2383   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2384   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2385   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2386   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   *newmat = A;
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 #undef __FUNCT__
2393 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2394 /*XXXXX@
2395    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2396 
2397    Input Parameters:
2398 .  mat  - the matrix
2399 .  fact - factor
2400 
2401    Collective on Mat
2402 
2403    Level: advanced
2404 
2405   Notes:
2406    This can also be set by the command line option: -mat_use_hash_table fact
2407 
2408 .keywords: matrix, hashtable, factor, HT
2409 
2410 .seealso: MatSetOption()
2411 @XXXXX*/
2412 
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2416 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2417 {
2418   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2419   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2420   PetscReal      atmp;
2421   PetscReal      *work,*svalues,*rvalues;
2422   PetscErrorCode ierr;
2423   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2424   PetscMPIInt    rank,size;
2425   PetscInt       *rowners_bs,dest,count,source;
2426   PetscScalar    *va;
2427   MatScalar      *ba;
2428   MPI_Status     stat;
2429 
2430   PetscFunctionBegin;
2431   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2432   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2433   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2434 
2435   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2436   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2437 
2438   bs   = A->rmap->bs;
2439   mbs  = a->mbs;
2440   Mbs  = a->Mbs;
2441   ba   = b->a;
2442   bi   = b->i;
2443   bj   = b->j;
2444 
2445   /* find ownerships */
2446   rowners_bs = A->rmap->range;
2447 
2448   /* each proc creates an array to be distributed */
2449   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2450   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2451 
2452   /* row_max for B */
2453   if (rank != size-1){
2454     for (i=0; i<mbs; i++) {
2455       ncols = bi[1] - bi[0]; bi++;
2456       brow  = bs*i;
2457       for (j=0; j<ncols; j++){
2458         bcol = bs*(*bj);
2459         for (kcol=0; kcol<bs; kcol++){
2460           col = bcol + kcol;                 /* local col index */
2461           col += rowners_bs[rank+1];      /* global col index */
2462           for (krow=0; krow<bs; krow++){
2463             atmp = PetscAbsScalar(*ba); ba++;
2464             row = brow + krow;    /* local row index */
2465             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2466             if (work[col] < atmp) work[col] = atmp;
2467           }
2468         }
2469         bj++;
2470       }
2471     }
2472 
2473     /* send values to its owners */
2474     for (dest=rank+1; dest<size; dest++){
2475       svalues = work + rowners_bs[dest];
2476       count   = rowners_bs[dest+1]-rowners_bs[dest];
2477       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2478     }
2479   }
2480 
2481   /* receive values */
2482   if (rank){
2483     rvalues = work;
2484     count   = rowners_bs[rank+1]-rowners_bs[rank];
2485     for (source=0; source<rank; source++){
2486       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2487       /* process values */
2488       for (i=0; i<count; i++){
2489         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2490       }
2491     }
2492   }
2493 
2494   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2495   ierr = PetscFree(work);CHKERRQ(ierr);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatSOR_MPISBAIJ"
2501 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2502 {
2503   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2504   PetscErrorCode ierr;
2505   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2506   PetscScalar    *x,*b,*ptr,*from;
2507   Vec            bb1;
2508 
2509   PetscFunctionBegin;
2510   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2511   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2512 
2513   if (flag == SOR_APPLY_UPPER) {
2514     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2515     PetscFunctionReturn(0);
2516   }
2517 
2518   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2519     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2520       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2521       its--;
2522     }
2523 
2524     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2525     while (its--){
2526 
2527       /* lower triangular part: slvec0b = - B^T*xx */
2528       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2529 
2530       /* copy xx into slvec0a */
2531       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2532       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2533       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2534       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2535 
2536       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2537 
2538       /* copy bb into slvec1a */
2539       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2540       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2541       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2542       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2543 
2544       /* set slvec1b = 0 */
2545       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2546 
2547       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2548       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2549       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2550       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2551 
2552       /* upper triangular part: bb1 = bb1 - B*x */
2553       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2554 
2555       /* local diagonal sweep */
2556       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2557     }
2558     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2559   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2560     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2561   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2562     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2563   } else if (flag & SOR_EISENSTAT) {
2564     Vec               xx1;
2565     PetscTruth        hasop;
2566     const PetscScalar *diag;
2567     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2568     PetscInt          i,n;
2569 
2570     if (!mat->xx1) {
2571       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2572       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2573     }
2574     xx1 = mat->xx1;
2575     bb1 = mat->bb1;
2576 
2577     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2578 
2579     if (!mat->diag) {
2580       /* this is wrong for same matrix with new nonzero values */
2581       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2582       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2583     }
2584     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2585 
2586     if (hasop) {
2587       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2588       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2589     } else {
2590       /*
2591           These two lines are replaced by code that may be a bit faster for a good compiler
2592       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2593       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2594       */
2595       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2596       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2597       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2598       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2599       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2600       if (omega == 1.0) {
2601 	for (i=0; i<n; i++) {
2602 	  sl[i] = b[i] - diag[i]*x[i];
2603 	}
2604         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2605       } else {
2606 	for (i=0; i<n; i++) {
2607 	  sl[i] = b[i] + scale*diag[i]*x[i];
2608 	}
2609         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2610       }
2611       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2612       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2613       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2614       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2615     }
2616 
2617     /* multiply off-diagonal portion of matrix */
2618     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2619     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2620     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2621     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2622     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2623     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2624     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2625     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2626     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2627     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2628 
2629     /* local sweep */
2630     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2631     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2632   } else {
2633     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2634   }
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 #undef __FUNCT__
2639 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2640 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2641 {
2642   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2643   PetscErrorCode ierr;
2644   Vec            lvec1,bb1;
2645 
2646   PetscFunctionBegin;
2647   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2648   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2649 
2650   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2651     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2652       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2653       its--;
2654     }
2655 
2656     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2657     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2658     while (its--){
2659       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2660 
2661       /* lower diagonal part: bb1 = bb - B^T*xx */
2662       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2663       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2664 
2665       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2666       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2667       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2668 
2669       /* upper diagonal part: bb1 = bb1 - B*x */
2670       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2671       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2672 
2673       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2674 
2675       /* diagonal sweep */
2676       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2677     }
2678     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2679     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2680   } else {
2681     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2682   }
2683   PetscFunctionReturn(0);
2684 }
2685 
2686