xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 753ebd1d069d7a4e86ed47ccb83df941bd1fb5db)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap->rstart;
146   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
147   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169   if (v) PetscValidScalarPointer(v,6);
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++  = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend){
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         }
309         else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined (PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined (PETSC_USE_CTABLE)
330 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col =  in[j];
338             }
339           }
340           else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend){
376           col = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined (PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data --;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else {
396       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
397     }
398   }
399  PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "MatNorm_MPISBAIJ"
404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
405 {
406   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
407   PetscErrorCode ierr;
408   PetscReal      sum[2],*lnorm2;
409 
410   PetscFunctionBegin;
411   if (baij->size == 1) {
412     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
413   } else {
414     if (type == NORM_FROBENIUS) {
415       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
416       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
418       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
420       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
421       *norm = sqrt(sum[0] + 2*sum[1]);
422       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
423     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
424       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
425       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
426       PetscReal    *rsum,*rsum2,vabs;
427       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
428       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
429       MatScalar    *v;
430 
431       ierr  = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
432       rsum2 = rsum + mat->cmap->N;
433       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++){
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++){
442             for (row=0; row<bs; row++){
443               vabs = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++){
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++){
459             for (row=0; row<bs; row++){
460               vabs = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree(rsum);CHKERRQ(ierr);
473     } else {
474       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
475     }
476   }
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
482 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
483 {
484   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
485   PetscErrorCode ierr;
486   PetscInt       nstash,reallocs;
487   InsertMode     addv;
488 
489   PetscFunctionBegin;
490   if (baij->donotstash) {
491     PetscFunctionReturn(0);
492   }
493 
494   /* make sure all processors are either in INSERTMODE or ADDMODE */
495   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
496   if (addv == (ADD_VALUES|INSERT_VALUES)) {
497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
498   }
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) {
807     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
808   }
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) {
844     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845   }
846 
847   /* diagonal part */
848   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
849   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
850 
851   /* subdiagonal part */
852   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
853 
854   /* copy x into the vec slvec0 */
855   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
859   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
860   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
861 
862   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864   /* supperdiagonal part */
865   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
871 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
872 {
873   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
874   PetscErrorCode ierr;
875   PetscInt       nt;
876 
877   PetscFunctionBegin;
878   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
879   if (nt != A->cmap->n) {
880     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
881   }
882   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
883   if (nt != A->rmap->N) {
884     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
885   }
886 
887   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   /* do diagonal part */
889   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
890   /* do supperdiagonal part */
891   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
893   /* do subdiagonal part */
894   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
895   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
896   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
897 
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
903 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
904 {
905   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
906   PetscErrorCode ierr;
907   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
908   PetscScalar    *x,*from,zero=0.0;
909 
910   PetscFunctionBegin;
911   /*
912   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
913   PetscSynchronizedFlush(((PetscObject)A)->comm);
914   */
915   /* diagonal part */
916   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
917   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
918 
919   /* subdiagonal part */
920   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
921 
922   /* copy x into the vec slvec0 */
923   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
924   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
925   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
926   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
927 
928   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
929   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
930   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931 
932   /* supperdiagonal part */
933   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
934 
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
940 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
941 {
942   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
943   PetscErrorCode ierr;
944 
945   PetscFunctionBegin;
946   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   /* do diagonal part */
948   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
949   /* do supperdiagonal part */
950   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
952 
953   /* do subdiagonal part */
954   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
955   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
956   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
957 
958   PetscFunctionReturn(0);
959 }
960 
961 /*
962   This only works correctly for square matrices where the subblock A->A is the
963    diagonal block
964 */
965 #undef __FUNCT__
966 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
967 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
968 {
969   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
974   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatScale_MPISBAIJ"
980 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
981 {
982   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
987   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatGetRow_MPISBAIJ"
993 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
994 {
995   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
996   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
997   PetscErrorCode ierr;
998   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
999   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1000   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1001 
1002   PetscFunctionBegin;
1003   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1004   mat->getrowactive = PETSC_TRUE;
1005 
1006   if (!mat->rowvalues && (idx || v)) {
1007     /*
1008         allocate enough space to hold information from the longest row.
1009     */
1010     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1011     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1012     PetscInt     max = 1,mbs = mat->mbs,tmp;
1013     for (i=0; i<mbs; i++) {
1014       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1015       if (max < tmp) { max = tmp; }
1016     }
1017     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1018     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1019   }
1020 
1021   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1022   lrow = row - brstart;  /* local row index */
1023 
1024   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1025   if (!v)   {pvA = 0; pvB = 0;}
1026   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1027   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1028   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1029   nztot = nzA + nzB;
1030 
1031   cmap  = mat->garray;
1032   if (v  || idx) {
1033     if (nztot) {
1034       /* Sort by increasing column numbers, assuming A and B already sorted */
1035       PetscInt imark = -1;
1036       if (v) {
1037         *v = v_p = mat->rowvalues;
1038         for (i=0; i<nzB; i++) {
1039           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1040           else break;
1041         }
1042         imark = i;
1043         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1044         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1045       }
1046       if (idx) {
1047         *idx = idx_p = mat->rowindices;
1048         if (imark > -1) {
1049           for (i=0; i<imark; i++) {
1050             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1051           }
1052         } else {
1053           for (i=0; i<nzB; i++) {
1054             if (cmap[cworkB[i]/bs] < cstart)
1055               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056             else break;
1057           }
1058           imark = i;
1059         }
1060         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1061         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1062       }
1063     } else {
1064       if (idx) *idx = 0;
1065       if (v)   *v   = 0;
1066     }
1067   }
1068   *nz = nztot;
1069   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1070   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1076 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1077 {
1078   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1079 
1080   PetscFunctionBegin;
1081   if (!baij->getrowactive) {
1082     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1083   }
1084   baij->getrowactive = PETSC_FALSE;
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1090 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1091 {
1092   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1093   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1094 
1095   PetscFunctionBegin;
1096   aA->getrow_utriangular = PETSC_TRUE;
1097   PetscFunctionReturn(0);
1098 }
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1101 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1102 {
1103   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1104   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1105 
1106   PetscFunctionBegin;
1107   aA->getrow_utriangular = PETSC_FALSE;
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1113 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1114 {
1115   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1116   PetscErrorCode ierr;
1117 
1118   PetscFunctionBegin;
1119   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1120   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1126 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1127 {
1128   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1133   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1139 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1140 {
1141   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1146   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1152 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1153 {
1154   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1155   Mat            A = a->A,B = a->B;
1156   PetscErrorCode ierr;
1157   PetscReal      isend[5],irecv[5];
1158 
1159   PetscFunctionBegin;
1160   info->block_size     = (PetscReal)matin->rmap->bs;
1161   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1162   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1163   isend[3] = info->memory;  isend[4] = info->mallocs;
1164   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1165   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1166   isend[3] += info->memory;  isend[4] += info->mallocs;
1167   if (flag == MAT_LOCAL) {
1168     info->nz_used      = isend[0];
1169     info->nz_allocated = isend[1];
1170     info->nz_unneeded  = isend[2];
1171     info->memory       = isend[3];
1172     info->mallocs      = isend[4];
1173   } else if (flag == MAT_GLOBAL_MAX) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   } else if (flag == MAT_GLOBAL_SUM) {
1181     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1182     info->nz_used      = irecv[0];
1183     info->nz_allocated = irecv[1];
1184     info->nz_unneeded  = irecv[2];
1185     info->memory       = irecv[3];
1186     info->mallocs      = irecv[4];
1187   } else {
1188     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1189   }
1190   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1191   info->fill_ratio_needed = 0;
1192   info->factor_mallocs    = 0;
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1198 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1199 {
1200   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1201   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1202   PetscErrorCode ierr;
1203 
1204   PetscFunctionBegin;
1205   switch (op) {
1206   case MAT_NEW_NONZERO_LOCATIONS:
1207   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1208   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1209   case MAT_KEEP_NONZERO_PATTERN:
1210   case MAT_NEW_NONZERO_LOCATION_ERR:
1211     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1212     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1213     break;
1214   case MAT_ROW_ORIENTED:
1215     a->roworiented = flg;
1216     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1218     break;
1219   case MAT_NEW_DIAGONALS:
1220     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1221     break;
1222   case MAT_IGNORE_OFF_PROC_ENTRIES:
1223     a->donotstash = flg;
1224     break;
1225   case MAT_USE_HASH_TABLE:
1226     a->ht_flag = flg;
1227     break;
1228   case MAT_HERMITIAN:
1229     if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1230     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1231     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1232     break;
1233   case MAT_SYMMETRIC:
1234     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1235     break;
1236   case MAT_STRUCTURALLY_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_SYMMETRY_ETERNAL:
1240     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1241     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1242     break;
1243   case MAT_IGNORE_LOWER_TRIANGULAR:
1244     aA->ignore_ltriangular = flg;
1245     break;
1246   case MAT_ERROR_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_GETROW_UPPERTRIANGULAR:
1250     aA->getrow_utriangular = flg;
1251     break;
1252   default:
1253     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1261 {
1262   PetscErrorCode ierr;
1263   PetscFunctionBegin;
1264   if (MAT_INITIAL_MATRIX || *B != A) {
1265     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1273 {
1274   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1275   Mat            a=baij->A, b=baij->B;
1276   PetscErrorCode ierr;
1277   PetscInt       nv,m,n;
1278   PetscTruth     flg;
1279 
1280   PetscFunctionBegin;
1281   if (ll != rr){
1282     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1283     if (!flg)
1284       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1285   }
1286   if (!ll) PetscFunctionReturn(0);
1287 
1288   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1289   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1290 
1291   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1292   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1293 
1294   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295 
1296   /* left diagonalscale the off-diagonal part */
1297   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1298 
1299   /* scale the diagonal part */
1300   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1301 
1302   /* right diagonalscale the off-diagonal part */
1303   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatEqual_MPISBAIJ"
1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1325 {
1326   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1327   Mat            a,b,c,d;
1328   PetscTruth     flg;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   a = matA->A; b = matA->B;
1333   c = matB->A; d = matB->B;
1334 
1335   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1336   if (flg) {
1337     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1338   }
1339   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCopy_MPISBAIJ"
1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1346 {
1347   PetscErrorCode ierr;
1348   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1349   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1350 
1351   PetscFunctionBegin;
1352   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1353   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1354     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1355     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1356     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1357   } else {
1358     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1359     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1366 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1367 {
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #include "petscblaslapack.h"
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa = (Mat_SeqSBAIJ *)xx->A->data;
1390     ya = (Mat_SeqSBAIJ *)yy->A->data;
1391     bnz = PetscBLASIntCast(xa->nz);
1392     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1393     xb = (Mat_SeqBAIJ *)xx->B->data;
1394     yb = (Mat_SeqBAIJ *)yy->B->data;
1395     bnz = PetscBLASIntCast(xb->nz);
1396     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1397   } else {
1398     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1407 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1408 {
1409   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1410   PetscInt        rbs,cbs;
1411   PetscErrorCode  ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1415   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1416   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1417   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1418   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1419   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1425 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1426 {
1427   PetscErrorCode ierr;
1428   PetscInt       i;
1429   PetscTruth     flg;
1430 
1431   PetscFunctionBegin;
1432   for (i=0; i<n; i++) {
1433     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1434     if (!flg) {
1435       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1436     }
1437   }
1438   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1439   PetscFunctionReturn(0);
1440 }
1441 
1442 
1443 /* -------------------------------------------------------------------*/
1444 static struct _MatOps MatOps_Values = {
1445        MatSetValues_MPISBAIJ,
1446        MatGetRow_MPISBAIJ,
1447        MatRestoreRow_MPISBAIJ,
1448        MatMult_MPISBAIJ,
1449 /* 4*/ MatMultAdd_MPISBAIJ,
1450        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1451        MatMultAdd_MPISBAIJ,
1452        0,
1453        0,
1454        0,
1455 /*10*/ 0,
1456        0,
1457        0,
1458        MatSOR_MPISBAIJ,
1459        MatTranspose_MPISBAIJ,
1460 /*15*/ MatGetInfo_MPISBAIJ,
1461        MatEqual_MPISBAIJ,
1462        MatGetDiagonal_MPISBAIJ,
1463        MatDiagonalScale_MPISBAIJ,
1464        MatNorm_MPISBAIJ,
1465 /*20*/ MatAssemblyBegin_MPISBAIJ,
1466        MatAssemblyEnd_MPISBAIJ,
1467        MatSetOption_MPISBAIJ,
1468        MatZeroEntries_MPISBAIJ,
1469 /*24*/ 0,
1470        0,
1471        0,
1472        0,
1473        0,
1474 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*34*/ MatDuplicate_MPISBAIJ,
1480        0,
1481        0,
1482        0,
1483        0,
1484 /*39*/ MatAXPY_MPISBAIJ,
1485        MatGetSubMatrices_MPISBAIJ,
1486        MatIncreaseOverlap_MPISBAIJ,
1487        MatGetValues_MPISBAIJ,
1488        MatCopy_MPISBAIJ,
1489 /*44*/ 0,
1490        MatScale_MPISBAIJ,
1491        0,
1492        0,
1493        0,
1494 /*49*/ MatSetBlockSize_MPISBAIJ,
1495        0,
1496        0,
1497        0,
1498        0,
1499 /*54*/ 0,
1500        0,
1501        MatSetUnfactored_MPISBAIJ,
1502        0,
1503        MatSetValuesBlocked_MPISBAIJ,
1504 /*59*/ 0,
1505        0,
1506        0,
1507        0,
1508        0,
1509 /*64*/ 0,
1510        0,
1511        0,
1512        0,
1513        0,
1514 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1515        0,
1516        0,
1517        0,
1518        0,
1519 /*74*/ 0,
1520        0,
1521        0,
1522        0,
1523        0,
1524 /*79*/ 0,
1525        0,
1526        0,
1527        0,
1528        MatLoad_MPISBAIJ,
1529 /*84*/ 0,
1530        0,
1531        0,
1532        0,
1533        0,
1534 /*89*/ 0,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*94*/ 0,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*99*/ 0,
1545        0,
1546        0,
1547        0,
1548        0,
1549 /*104*/0,
1550        MatRealPart_MPISBAIJ,
1551        MatImaginaryPart_MPISBAIJ,
1552        MatGetRowUpperTriangular_MPISBAIJ,
1553        MatRestoreRowUpperTriangular_MPISBAIJ
1554 };
1555 
1556 
1557 EXTERN_C_BEGIN
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1560 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1561 {
1562   PetscFunctionBegin;
1563   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1564   *iscopy = PETSC_FALSE;
1565   PetscFunctionReturn(0);
1566 }
1567 EXTERN_C_END
1568 
1569 EXTERN_C_BEGIN
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1572 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1573 {
1574   Mat_MPISBAIJ   *b;
1575   PetscErrorCode ierr;
1576   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1577 
1578   PetscFunctionBegin;
1579   if (bs < 0){
1580     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1581       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1582     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1583     bs   = PetscAbs(bs);
1584   }
1585   if ((d_nnz || o_nnz) && newbs != bs) {
1586     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1587   }
1588   bs = newbs;
1589 
1590   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1591   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1592   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1593   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1594 
1595   B->rmap->bs = B->cmap->bs = bs;
1596   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1597   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1598 
1599   if (d_nnz) {
1600     for (i=0; i<B->rmap->n/bs; i++) {
1601       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1602     }
1603   }
1604   if (o_nnz) {
1605     for (i=0; i<B->rmap->n/bs; i++) {
1606       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1607     }
1608   }
1609 
1610   b   = (Mat_MPISBAIJ*)B->data;
1611   mbs = B->rmap->n/bs;
1612   Mbs = B->rmap->N/bs;
1613   if (mbs*bs != B->rmap->n) {
1614     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1615   }
1616 
1617   B->rmap->bs  = bs;
1618   b->bs2 = bs*bs;
1619   b->mbs = mbs;
1620   b->nbs = mbs;
1621   b->Mbs = Mbs;
1622   b->Nbs = Mbs;
1623 
1624   for (i=0; i<=b->size; i++) {
1625     b->rangebs[i] = B->rmap->range[i]/bs;
1626   }
1627   b->rstartbs = B->rmap->rstart/bs;
1628   b->rendbs   = B->rmap->rend/bs;
1629 
1630   b->cstartbs = B->cmap->rstart/bs;
1631   b->cendbs   = B->cmap->rend/bs;
1632 
1633   if (!B->preallocated) {
1634     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1635     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1636     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1637     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1638     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1639     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1640     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1641     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1642     /* build cache for off array entries formed */
1643     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1644   }
1645 
1646   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1647   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1648   B->preallocated = PETSC_TRUE;
1649   PetscFunctionReturn(0);
1650 }
1651 EXTERN_C_END
1652 
1653 EXTERN_C_BEGIN
1654 #if defined(PETSC_HAVE_MUMPS)
1655 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1656 #endif
1657 #if defined(PETSC_HAVE_SPOOLES)
1658 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1659 #endif
1660 #if defined(PETSC_HAVE_PASTIX)
1661 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1662 #endif
1663 EXTERN_C_END
1664 
1665 /*MC
1666    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1667    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1668 
1669    Options Database Keys:
1670 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1671 
1672   Level: beginner
1673 
1674 .seealso: MatCreateMPISBAIJ
1675 M*/
1676 
1677 EXTERN_C_BEGIN
1678 #undef __FUNCT__
1679 #define __FUNCT__ "MatCreate_MPISBAIJ"
1680 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1681 {
1682   Mat_MPISBAIJ   *b;
1683   PetscErrorCode ierr;
1684   PetscTruth     flg;
1685 
1686   PetscFunctionBegin;
1687 
1688   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1689   B->data = (void*)b;
1690   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1691 
1692   B->ops->destroy    = MatDestroy_MPISBAIJ;
1693   B->ops->view       = MatView_MPISBAIJ;
1694   B->mapping         = 0;
1695   B->assembled       = PETSC_FALSE;
1696 
1697   B->insertmode = NOT_SET_VALUES;
1698   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1699   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1700 
1701   /* build local table of row and column ownerships */
1702   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1703 
1704   /* build cache for off array entries formed */
1705   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1706   b->donotstash  = PETSC_FALSE;
1707   b->colmap      = PETSC_NULL;
1708   b->garray      = PETSC_NULL;
1709   b->roworiented = PETSC_TRUE;
1710 
1711   /* stuff used in block assembly */
1712   b->barray       = 0;
1713 
1714   /* stuff used for matrix vector multiply */
1715   b->lvec         = 0;
1716   b->Mvctx        = 0;
1717   b->slvec0       = 0;
1718   b->slvec0b      = 0;
1719   b->slvec1       = 0;
1720   b->slvec1a      = 0;
1721   b->slvec1b      = 0;
1722   b->sMvctx       = 0;
1723 
1724   /* stuff for MatGetRow() */
1725   b->rowindices   = 0;
1726   b->rowvalues    = 0;
1727   b->getrowactive = PETSC_FALSE;
1728 
1729   /* hash table stuff */
1730   b->ht           = 0;
1731   b->hd           = 0;
1732   b->ht_size      = 0;
1733   b->ht_flag      = PETSC_FALSE;
1734   b->ht_fact      = 0;
1735   b->ht_total_ct  = 0;
1736   b->ht_insert_ct = 0;
1737 
1738   b->in_loc       = 0;
1739   b->v_loc        = 0;
1740   b->n_loc        = 0;
1741   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1742     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1743     if (flg) {
1744       PetscReal fact = 1.39;
1745       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1746       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1747       if (fact <= 1.0) fact = 1.39;
1748       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1749       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1750     }
1751   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1752 
1753 #if defined(PETSC_HAVE_PASTIX)
1754   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1755 					   "MatGetFactor_mpisbaij_pastix",
1756 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1757 #endif
1758 #if defined(PETSC_HAVE_MUMPS)
1759   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1760                                      "MatGetFactor_mpisbaij_mumps",
1761                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1762 #endif
1763 #if defined(PETSC_HAVE_SPOOLES)
1764   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1765                                      "MatGetFactor_mpisbaij_spooles",
1766                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1767 #endif
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1769                                      "MatStoreValues_MPISBAIJ",
1770                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1772                                      "MatRetrieveValues_MPISBAIJ",
1773                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1775                                      "MatGetDiagonalBlock_MPISBAIJ",
1776                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1778                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1779                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1780   B->symmetric                  = PETSC_TRUE;
1781   B->structurally_symmetric     = PETSC_TRUE;
1782   B->symmetric_set              = PETSC_TRUE;
1783   B->structurally_symmetric_set = PETSC_TRUE;
1784   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1785   PetscFunctionReturn(0);
1786 }
1787 EXTERN_C_END
1788 
1789 /*MC
1790    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1791 
1792    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1793    and MATMPISBAIJ otherwise.
1794 
1795    Options Database Keys:
1796 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1797 
1798   Level: beginner
1799 
1800 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1801 M*/
1802 
1803 EXTERN_C_BEGIN
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatCreate_SBAIJ"
1806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809   PetscMPIInt    size;
1810 
1811   PetscFunctionBegin;
1812   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1813   if (size == 1) {
1814     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1815   } else {
1816     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1817   }
1818   PetscFunctionReturn(0);
1819 }
1820 EXTERN_C_END
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1824 /*@C
1825    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1826    the user should preallocate the matrix storage by setting the parameters
1827    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1828    performance can be increased by more than a factor of 50.
1829 
1830    Collective on Mat
1831 
1832    Input Parameters:
1833 +  A - the matrix
1834 .  bs   - size of blockk
1835 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1836            submatrix  (same for all local rows)
1837 .  d_nnz - array containing the number of block nonzeros in the various block rows
1838            in the upper triangular and diagonal part of the in diagonal portion of the local
1839            (possibly different for each block row) or PETSC_NULL.  You must leave room
1840            for the diagonal entry even if it is zero.
1841 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1842            submatrix (same for all local rows).
1843 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1844            off-diagonal portion of the local submatrix (possibly different for
1845            each block row) or PETSC_NULL.
1846 
1847 
1848    Options Database Keys:
1849 .   -mat_no_unroll - uses code that does not unroll the loops in the
1850                      block calculations (much slower)
1851 .   -mat_block_size - size of the blocks to use
1852 
1853    Notes:
1854 
1855    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1856    than it must be used on all processors that share the object for that argument.
1857 
1858    If the *_nnz parameter is given then the *_nz parameter is ignored
1859 
1860    Storage Information:
1861    For a square global matrix we define each processor's diagonal portion
1862    to be its local rows and the corresponding columns (a square submatrix);
1863    each processor's off-diagonal portion encompasses the remainder of the
1864    local matrix (a rectangular submatrix).
1865 
1866    The user can specify preallocated storage for the diagonal part of
1867    the local submatrix with either d_nz or d_nnz (not both).  Set
1868    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1869    memory allocation.  Likewise, specify preallocated storage for the
1870    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1871 
1872    You can call MatGetInfo() to get information on how effective the preallocation was;
1873    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1874    You can also run with the option -info and look for messages with the string
1875    malloc in them to see if additional memory allocation was needed.
1876 
1877    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1878    the figure below we depict these three local rows and all columns (0-11).
1879 
1880 .vb
1881            0 1 2 3 4 5 6 7 8 9 10 11
1882           -------------------
1883    row 3  |  o o o d d d o o o o o o
1884    row 4  |  o o o d d d o o o o o o
1885    row 5  |  o o o d d d o o o o o o
1886           -------------------
1887 .ve
1888 
1889    Thus, any entries in the d locations are stored in the d (diagonal)
1890    submatrix, and any entries in the o locations are stored in the
1891    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1892    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1893 
1894    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1895    plus the diagonal part of the d matrix,
1896    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1897    In general, for PDE problems in which most nonzeros are near the diagonal,
1898    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1899    or you will get TERRIBLE performance; see the users' manual chapter on
1900    matrices.
1901 
1902    Level: intermediate
1903 
1904 .keywords: matrix, block, aij, compressed row, sparse, parallel
1905 
1906 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1907 @*/
1908 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1909 {
1910   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1911 
1912   PetscFunctionBegin;
1913   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1914   if (f) {
1915     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1916   }
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatCreateMPISBAIJ"
1922 /*@C
1923    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1924    (block compressed row).  For good matrix assembly performance
1925    the user should preallocate the matrix storage by setting the parameters
1926    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1927    performance can be increased by more than a factor of 50.
1928 
1929    Collective on MPI_Comm
1930 
1931    Input Parameters:
1932 +  comm - MPI communicator
1933 .  bs   - size of blockk
1934 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1935            This value should be the same as the local size used in creating the
1936            y vector for the matrix-vector product y = Ax.
1937 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1938            This value should be the same as the local size used in creating the
1939            x vector for the matrix-vector product y = Ax.
1940 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1941 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1942 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1943            submatrix  (same for all local rows)
1944 .  d_nnz - array containing the number of block nonzeros in the various block rows
1945            in the upper triangular portion of the in diagonal portion of the local
1946            (possibly different for each block block row) or PETSC_NULL.
1947            You must leave room for the diagonal entry even if it is zero.
1948 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1949            submatrix (same for all local rows).
1950 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1951            off-diagonal portion of the local submatrix (possibly different for
1952            each block row) or PETSC_NULL.
1953 
1954    Output Parameter:
1955 .  A - the matrix
1956 
1957    Options Database Keys:
1958 .   -mat_no_unroll - uses code that does not unroll the loops in the
1959                      block calculations (much slower)
1960 .   -mat_block_size - size of the blocks to use
1961 .   -mat_mpi - use the parallel matrix data structures even on one processor
1962                (defaults to using SeqBAIJ format on one processor)
1963 
1964    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1965    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1966    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1967 
1968    Notes:
1969    The number of rows and columns must be divisible by blocksize.
1970    This matrix type does not support complex Hermitian operation.
1971 
1972    The user MUST specify either the local or global matrix dimensions
1973    (possibly both).
1974 
1975    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1976    than it must be used on all processors that share the object for that argument.
1977 
1978    If the *_nnz parameter is given then the *_nz parameter is ignored
1979 
1980    Storage Information:
1981    For a square global matrix we define each processor's diagonal portion
1982    to be its local rows and the corresponding columns (a square submatrix);
1983    each processor's off-diagonal portion encompasses the remainder of the
1984    local matrix (a rectangular submatrix).
1985 
1986    The user can specify preallocated storage for the diagonal part of
1987    the local submatrix with either d_nz or d_nnz (not both).  Set
1988    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1989    memory allocation.  Likewise, specify preallocated storage for the
1990    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1991 
1992    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1993    the figure below we depict these three local rows and all columns (0-11).
1994 
1995 .vb
1996            0 1 2 3 4 5 6 7 8 9 10 11
1997           -------------------
1998    row 3  |  o o o d d d o o o o o o
1999    row 4  |  o o o d d d o o o o o o
2000    row 5  |  o o o d d d o o o o o o
2001           -------------------
2002 .ve
2003 
2004    Thus, any entries in the d locations are stored in the d (diagonal)
2005    submatrix, and any entries in the o locations are stored in the
2006    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2007    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2008 
2009    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2010    plus the diagonal part of the d matrix,
2011    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2012    In general, for PDE problems in which most nonzeros are near the diagonal,
2013    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2014    or you will get TERRIBLE performance; see the users' manual chapter on
2015    matrices.
2016 
2017    Level: intermediate
2018 
2019 .keywords: matrix, block, aij, compressed row, sparse, parallel
2020 
2021 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2022 @*/
2023 
2024 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2025 {
2026   PetscErrorCode ierr;
2027   PetscMPIInt    size;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2031   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2032   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2033   if (size > 1) {
2034     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2035     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2036   } else {
2037     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2038     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2039   }
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2046 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2047 {
2048   Mat            mat;
2049   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2050   PetscErrorCode ierr;
2051   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2052   PetscScalar    *array;
2053 
2054   PetscFunctionBegin;
2055   *newmat       = 0;
2056   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2057   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2058   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2059   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2060   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2061   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2062 
2063   mat->factor       = matin->factor;
2064   mat->preallocated = PETSC_TRUE;
2065   mat->assembled    = PETSC_TRUE;
2066   mat->insertmode   = NOT_SET_VALUES;
2067 
2068   a = (Mat_MPISBAIJ*)mat->data;
2069   a->bs2   = oldmat->bs2;
2070   a->mbs   = oldmat->mbs;
2071   a->nbs   = oldmat->nbs;
2072   a->Mbs   = oldmat->Mbs;
2073   a->Nbs   = oldmat->Nbs;
2074 
2075 
2076   a->size         = oldmat->size;
2077   a->rank         = oldmat->rank;
2078   a->donotstash   = oldmat->donotstash;
2079   a->roworiented  = oldmat->roworiented;
2080   a->rowindices   = 0;
2081   a->rowvalues    = 0;
2082   a->getrowactive = PETSC_FALSE;
2083   a->barray       = 0;
2084   a->rstartbs    = oldmat->rstartbs;
2085   a->rendbs      = oldmat->rendbs;
2086   a->cstartbs    = oldmat->cstartbs;
2087   a->cendbs      = oldmat->cendbs;
2088 
2089   /* hash table stuff */
2090   a->ht           = 0;
2091   a->hd           = 0;
2092   a->ht_size      = 0;
2093   a->ht_flag      = oldmat->ht_flag;
2094   a->ht_fact      = oldmat->ht_fact;
2095   a->ht_total_ct  = 0;
2096   a->ht_insert_ct = 0;
2097 
2098   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2099   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2100   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2101   if (oldmat->colmap) {
2102 #if defined (PETSC_USE_CTABLE)
2103     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2104 #else
2105     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2106     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2107     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2108 #endif
2109   } else a->colmap = 0;
2110 
2111   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2112     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2113     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2115   } else a->garray = 0;
2116 
2117   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2119   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2121 
2122   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2123   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2124   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2125   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2126 
2127   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2128   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2131   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2132   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2133   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2134   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2140 
2141   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2142   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2143   a->sMvctx = oldmat->sMvctx;
2144   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2145 
2146   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2148   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2149   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2150   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2151   *newmat = mat;
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #include "petscsys.h"
2156 
2157 #undef __FUNCT__
2158 #define __FUNCT__ "MatLoad_MPISBAIJ"
2159 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2160 {
2161   Mat            A;
2162   PetscErrorCode ierr;
2163   PetscInt       i,nz,j,rstart,rend;
2164   PetscScalar    *vals,*buf;
2165   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2166   MPI_Status     status;
2167   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2168   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2169   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2170   PetscInt       bs=1,Mbs,mbs,extra_rows;
2171   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2172   PetscInt       dcount,kmax,k,nzcount,tmp;
2173   int            fd;
2174 
2175   PetscFunctionBegin;
2176   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2177     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2178   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2179 
2180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2181   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2182   if (!rank) {
2183     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2184     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2185     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2186     if (header[3] < 0) {
2187       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2188     }
2189   }
2190 
2191   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2192   M = header[1]; N = header[2];
2193 
2194   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2195 
2196   /*
2197      This code adds extra rows to make sure the number of rows is
2198      divisible by the blocksize
2199   */
2200   Mbs        = M/bs;
2201   extra_rows = bs - M + bs*(Mbs);
2202   if (extra_rows == bs) extra_rows = 0;
2203   else                  Mbs++;
2204   if (extra_rows &&!rank) {
2205     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2206   }
2207 
2208   /* determine ownership of all rows */
2209   mbs        = Mbs/size + ((Mbs % size) > rank);
2210   m          = mbs*bs;
2211   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2212   browners   = rowners + size + 1;
2213   mmbs       = PetscMPIIntCast(mbs);
2214   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2215   rowners[0] = 0;
2216   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2217   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2218   rstart = rowners[rank];
2219   rend   = rowners[rank+1];
2220 
2221   /* distribute row lengths to all processors */
2222   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2223   if (!rank) {
2224     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2225     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2226     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2227     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2228     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2229     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2230     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2231   } else {
2232     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2233   }
2234 
2235   if (!rank) {   /* procs[0] */
2236     /* calculate the number of nonzeros on each processor */
2237     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2238     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2239     for (i=0; i<size; i++) {
2240       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2241         procsnz[i] += rowlengths[j];
2242       }
2243     }
2244     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2245 
2246     /* determine max buffer needed and allocate it */
2247     maxnz = 0;
2248     for (i=0; i<size; i++) {
2249       maxnz = PetscMax(maxnz,procsnz[i]);
2250     }
2251     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2252 
2253     /* read in my part of the matrix column indices  */
2254     nz     = procsnz[0];
2255     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2256     mycols = ibuf;
2257     if (size == 1)  nz -= extra_rows;
2258     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2259     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2260 
2261     /* read in every ones (except the last) and ship off */
2262     for (i=1; i<size-1; i++) {
2263       nz   = procsnz[i];
2264       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2265       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2266     }
2267     /* read in the stuff for the last proc */
2268     if (size != 1) {
2269       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2270       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2271       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2272       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2273     }
2274     ierr = PetscFree(cols);CHKERRQ(ierr);
2275   } else {  /* procs[i], i>0 */
2276     /* determine buffer space needed for message */
2277     nz = 0;
2278     for (i=0; i<m; i++) {
2279       nz += locrowlens[i];
2280     }
2281     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2282     mycols = ibuf;
2283     /* receive message of column indices*/
2284     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2285     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2287   }
2288 
2289   /* loop over local rows, determining number of off diagonal entries */
2290   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2291   odlens   = dlens + (rend-rstart);
2292   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2293   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2294   masked1  = mask    + Mbs;
2295   masked2  = masked1 + Mbs;
2296   rowcount = 0; nzcount = 0;
2297   for (i=0; i<mbs; i++) {
2298     dcount  = 0;
2299     odcount = 0;
2300     for (j=0; j<bs; j++) {
2301       kmax = locrowlens[rowcount];
2302       for (k=0; k<kmax; k++) {
2303         tmp = mycols[nzcount++]/bs; /* block col. index */
2304         if (!mask[tmp]) {
2305           mask[tmp] = 1;
2306           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2307           else masked1[dcount++] = tmp; /* entry in diag portion */
2308         }
2309       }
2310       rowcount++;
2311     }
2312 
2313     dlens[i]  = dcount;  /* d_nzz[i] */
2314     odlens[i] = odcount; /* o_nzz[i] */
2315 
2316     /* zero out the mask elements we set */
2317     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2318     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2319   }
2320 
2321   /* create our matrix */
2322   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2323   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2324   ierr = MatSetType(A,type);CHKERRQ(ierr);
2325   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2326   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2327 
2328   if (!rank) {
2329     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2330     /* read in my part of the matrix numerical values  */
2331     nz = procsnz[0];
2332     vals = buf;
2333     mycols = ibuf;
2334     if (size == 1)  nz -= extra_rows;
2335     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2336     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2337 
2338     /* insert into matrix */
2339     jj      = rstart*bs;
2340     for (i=0; i<m; i++) {
2341       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2342       mycols += locrowlens[i];
2343       vals   += locrowlens[i];
2344       jj++;
2345     }
2346 
2347     /* read in other processors (except the last one) and ship out */
2348     for (i=1; i<size-1; i++) {
2349       nz   = procsnz[i];
2350       vals = buf;
2351       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2352       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2353     }
2354     /* the last proc */
2355     if (size != 1){
2356       nz   = procsnz[i] - extra_rows;
2357       vals = buf;
2358       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2359       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2360       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2361     }
2362     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2363 
2364   } else {
2365     /* receive numeric values */
2366     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2367 
2368     /* receive message of values*/
2369     vals   = buf;
2370     mycols = ibuf;
2371     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2372     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2373     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2374 
2375     /* insert into matrix */
2376     jj      = rstart*bs;
2377     for (i=0; i<m; i++) {
2378       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2379       mycols += locrowlens[i];
2380       vals   += locrowlens[i];
2381       jj++;
2382     }
2383   }
2384 
2385   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2386   ierr = PetscFree(buf);CHKERRQ(ierr);
2387   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2388   ierr = PetscFree(rowners);CHKERRQ(ierr);
2389   ierr = PetscFree(dlens);CHKERRQ(ierr);
2390   ierr = PetscFree(mask);CHKERRQ(ierr);
2391   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   *newmat = A;
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2399 /*XXXXX@
2400    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2401 
2402    Input Parameters:
2403 .  mat  - the matrix
2404 .  fact - factor
2405 
2406    Collective on Mat
2407 
2408    Level: advanced
2409 
2410   Notes:
2411    This can also be set by the command line option: -mat_use_hash_table fact
2412 
2413 .keywords: matrix, hashtable, factor, HT
2414 
2415 .seealso: MatSetOption()
2416 @XXXXX*/
2417 
2418 
2419 #undef __FUNCT__
2420 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2421 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2422 {
2423   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2424   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2425   PetscReal      atmp;
2426   PetscReal      *work,*svalues,*rvalues;
2427   PetscErrorCode ierr;
2428   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2429   PetscMPIInt    rank,size;
2430   PetscInt       *rowners_bs,dest,count,source;
2431   PetscScalar    *va;
2432   MatScalar      *ba;
2433   MPI_Status     stat;
2434 
2435   PetscFunctionBegin;
2436   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2437   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2438   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2439 
2440   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2441   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2442 
2443   bs   = A->rmap->bs;
2444   mbs  = a->mbs;
2445   Mbs  = a->Mbs;
2446   ba   = b->a;
2447   bi   = b->i;
2448   bj   = b->j;
2449 
2450   /* find ownerships */
2451   rowners_bs = A->rmap->range;
2452 
2453   /* each proc creates an array to be distributed */
2454   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2455   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2456 
2457   /* row_max for B */
2458   if (rank != size-1){
2459     for (i=0; i<mbs; i++) {
2460       ncols = bi[1] - bi[0]; bi++;
2461       brow  = bs*i;
2462       for (j=0; j<ncols; j++){
2463         bcol = bs*(*bj);
2464         for (kcol=0; kcol<bs; kcol++){
2465           col = bcol + kcol;                 /* local col index */
2466           col += rowners_bs[rank+1];      /* global col index */
2467           for (krow=0; krow<bs; krow++){
2468             atmp = PetscAbsScalar(*ba); ba++;
2469             row = brow + krow;    /* local row index */
2470             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2471             if (work[col] < atmp) work[col] = atmp;
2472           }
2473         }
2474         bj++;
2475       }
2476     }
2477 
2478     /* send values to its owners */
2479     for (dest=rank+1; dest<size; dest++){
2480       svalues = work + rowners_bs[dest];
2481       count   = rowners_bs[dest+1]-rowners_bs[dest];
2482       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2483     }
2484   }
2485 
2486   /* receive values */
2487   if (rank){
2488     rvalues = work;
2489     count   = rowners_bs[rank+1]-rowners_bs[rank];
2490     for (source=0; source<rank; source++){
2491       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2492       /* process values */
2493       for (i=0; i<count; i++){
2494         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2495       }
2496     }
2497   }
2498 
2499   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2500   ierr = PetscFree(work);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatSOR_MPISBAIJ"
2506 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2507 {
2508   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2509   PetscErrorCode ierr;
2510   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2511   PetscScalar    *x,*b,*ptr,*from;
2512   Vec            bb1;
2513 
2514   PetscFunctionBegin;
2515   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2516   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2517 
2518   if (flag == SOR_APPLY_UPPER) {
2519     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2520     PetscFunctionReturn(0);
2521   }
2522 
2523   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2524     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2525       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2526       its--;
2527     }
2528 
2529     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2530     while (its--){
2531 
2532       /* lower triangular part: slvec0b = - B^T*xx */
2533       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2534 
2535       /* copy xx into slvec0a */
2536       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2537       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2538       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2539       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2540 
2541       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2542 
2543       /* copy bb into slvec1a */
2544       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2545       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2546       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2547       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2548 
2549       /* set slvec1b = 0 */
2550       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2551 
2552       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2553       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2554       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2555       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2556 
2557       /* upper triangular part: bb1 = bb1 - B*x */
2558       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2559 
2560       /* local diagonal sweep */
2561       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2562     }
2563     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2564   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2565     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2566   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2567     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2568   } else if (flag & SOR_EISENSTAT) {
2569     Vec               xx1;
2570     PetscTruth        hasop;
2571     const PetscScalar *diag;
2572     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2573     PetscInt          i,n;
2574 
2575     if (!mat->xx1) {
2576       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2577       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2578     }
2579     xx1 = mat->xx1;
2580     bb1 = mat->bb1;
2581 
2582     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2583 
2584     if (!mat->diag) {
2585       /* this is wrong for same matrix with new nonzero values */
2586       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2587       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2588     }
2589     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2590 
2591     if (hasop) {
2592       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2593       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2594     } else {
2595       /*
2596           These two lines are replaced by code that may be a bit faster for a good compiler
2597       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2598       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2599       */
2600       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2601       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2602       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2603       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2604       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2605       if (omega == 1.0) {
2606 	for (i=0; i<n; i++) {
2607 	  sl[i] = b[i] - diag[i]*x[i];
2608 	}
2609         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2610       } else {
2611 	for (i=0; i<n; i++) {
2612 	  sl[i] = b[i] + scale*diag[i]*x[i];
2613 	}
2614         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2615       }
2616       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2617       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2618       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2619       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2620     }
2621 
2622     /* multiply off-diagonal portion of matrix */
2623     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2624     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2625     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2626     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2627     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2628     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2629     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2630     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2631     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2632     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2633 
2634     /* local sweep */
2635     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2636     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2637   } else {
2638     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2639   }
2640   PetscFunctionReturn(0);
2641 }
2642 
2643 #undef __FUNCT__
2644 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2645 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2646 {
2647   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2648   PetscErrorCode ierr;
2649   Vec            lvec1,bb1;
2650 
2651   PetscFunctionBegin;
2652   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2653   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2654 
2655   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2656     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2657       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2658       its--;
2659     }
2660 
2661     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2662     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2663     while (its--){
2664       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2665 
2666       /* lower diagonal part: bb1 = bb - B^T*xx */
2667       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2668       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2669 
2670       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2671       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2672       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2673 
2674       /* upper diagonal part: bb1 = bb1 - B*x */
2675       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2676       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2677 
2678       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2679 
2680       /* diagonal sweep */
2681       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2682     }
2683     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2684     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2685   } else {
2686     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2687   }
2688   PetscFunctionReturn(0);
2689 }
2690 
2691