xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision fcc2139e2b6db8cad23d587d944bcbf09bc8202e)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscTruth     roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = sqrt(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscTruth     other_disassembled;
522   PetscMPIInt    n;
523   PetscTruth     r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscTruth        iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
626              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
633       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
639       PetscFunctionReturn(0);
640     }
641   }
642 
643   if (isdraw) {
644     PetscDraw  draw;
645     PetscTruth isnull;
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
648   }
649 
650   if (size == 1) {
651     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
652     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
653   } else {
654     /* assemble the entire matrix onto first processor. */
655     Mat          A;
656     Mat_SeqSBAIJ *Aloc;
657     Mat_SeqBAIJ  *Bloc;
658     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
659     MatScalar    *a;
660 
661     /* Should this be the same type as mat? */
662     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
669     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
670     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
671 
672     /* copy over the A part */
673     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
674     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
675     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
676 
677     for (i=0; i<mbs; i++) {
678       rvals[0] = bs*(baij->rstartbs + i);
679       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
680       for (j=ai[i]; j<ai[i+1]; j++) {
681         col = (baij->cstartbs+aj[j])*bs;
682         for (k=0; k<bs; k++) {
683           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
684           col++; a += bs;
685         }
686       }
687     }
688     /* copy over the B part */
689     Bloc = (Mat_SeqBAIJ*)baij->B->data;
690     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
691     for (i=0; i<mbs; i++) {
692 
693       rvals[0] = bs*(baij->rstartbs + i);
694       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
695       for (j=ai[i]; j<ai[i+1]; j++) {
696         col = baij->garray[aj[j]]*bs;
697         for (k=0; k<bs; k++) {
698           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
699           col++; a += bs;
700         }
701       }
702     }
703     ierr = PetscFree(rvals);CHKERRQ(ierr);
704     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     /*
707        Everyone has to call to draw the matrix since the graphics waits are
708        synchronized across all processors that share the PetscDraw object
709     */
710     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
713       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
714     }
715     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     ierr = MatDestroy(A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatView_MPISBAIJ"
723 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
724 {
725   PetscErrorCode ierr;
726   PetscTruth     iascii,isdraw,issocket,isbinary;
727 
728   PetscFunctionBegin;
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
733   if (iascii || isdraw || issocket || isbinary) {
734     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
735   } else {
736     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatDestroy_MPISBAIJ"
743 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
744 {
745   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
746   PetscErrorCode ierr;
747 
748   PetscFunctionBegin;
749 #if defined(PETSC_USE_LOG)
750   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
751 #endif
752   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
753   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
754   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
755   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
756 #if defined (PETSC_USE_CTABLE)
757   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
758 #else
759   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
760 #endif
761   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
762   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
763   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
764   if (baij->slvec0) {
765     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
766     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
767   }
768   if (baij->slvec1) {
769     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
771     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
772   }
773   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
778   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
779   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
780 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(baij);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
798 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
799 {
800   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
801   PetscErrorCode ierr;
802   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
803   PetscScalar    *x,*from;
804 
805   PetscFunctionBegin;
806   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
807   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
808 
809   /* diagonal part */
810   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
811   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
812 
813   /* subdiagonal part */
814   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
815 
816   /* copy x into the vec slvec0 */
817   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
818   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819 
820   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
821   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   /* supperdiagonal part */
827   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "MatMult_MPISBAIJ"
833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
834 {
835   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
836   PetscErrorCode ierr;
837   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
838   PetscScalar    *x,*from;
839 
840   PetscFunctionBegin;
841   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
842   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
843 
844   /* diagonal part */
845   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
846   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
847 
848   /* subdiagonal part */
849   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
850 
851   /* copy x into the vec slvec0 */
852   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
853   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
854 
855   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
856   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
857   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
858 
859   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   /* supperdiagonal part */
862   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
868 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
869 {
870   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
871   PetscErrorCode ierr;
872   PetscInt       nt;
873 
874   PetscFunctionBegin;
875   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
876   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
877 
878   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
879   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
880 
881   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   /* do diagonal part */
883   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
884   /* do supperdiagonal part */
885   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
887   /* do subdiagonal part */
888   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
889   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891 
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
897 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
898 {
899   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
900   PetscErrorCode ierr;
901   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
902   PetscScalar    *x,*from,zero=0.0;
903 
904   PetscFunctionBegin;
905   /*
906   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
907   PetscSynchronizedFlush(((PetscObject)A)->comm);
908   */
909   /* diagonal part */
910   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
911   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
912 
913   /* subdiagonal part */
914   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
915 
916   /* copy x into the vec slvec0 */
917   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
919   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
920   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
921 
922   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
924   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925 
926   /* supperdiagonal part */
927   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
928 
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   /* do diagonal part */
942   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
943   /* do supperdiagonal part */
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
946 
947   /* do subdiagonal part */
948   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
949   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951 
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956   This only works correctly for square matrices where the subblock A->A is the
957    diagonal block
958 */
959 #undef __FUNCT__
960 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
961 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
962 {
963   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
964   PetscErrorCode ierr;
965 
966   PetscFunctionBegin;
967   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
968   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatScale_MPISBAIJ"
974 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
975 {
976   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
981   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatGetRow_MPISBAIJ"
987 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
988 {
989   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
990   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
991   PetscErrorCode ierr;
992   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
993   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
994   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
995 
996   PetscFunctionBegin;
997   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
998   mat->getrowactive = PETSC_TRUE;
999 
1000   if (!mat->rowvalues && (idx || v)) {
1001     /*
1002         allocate enough space to hold information from the longest row.
1003     */
1004     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1005     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1006     PetscInt     max = 1,mbs = mat->mbs,tmp;
1007     for (i=0; i<mbs; i++) {
1008       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1009       if (max < tmp) { max = tmp; }
1010     }
1011     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1012   }
1013 
1014   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1015   lrow = row - brstart;  /* local row index */
1016 
1017   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1018   if (!v)   {pvA = 0; pvB = 0;}
1019   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1020   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1021   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1022   nztot = nzA + nzB;
1023 
1024   cmap  = mat->garray;
1025   if (v  || idx) {
1026     if (nztot) {
1027       /* Sort by increasing column numbers, assuming A and B already sorted */
1028       PetscInt imark = -1;
1029       if (v) {
1030         *v = v_p = mat->rowvalues;
1031         for (i=0; i<nzB; i++) {
1032           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1033           else break;
1034         }
1035         imark = i;
1036         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1037         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1038       }
1039       if (idx) {
1040         *idx = idx_p = mat->rowindices;
1041         if (imark > -1) {
1042           for (i=0; i<imark; i++) {
1043             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1044           }
1045         } else {
1046           for (i=0; i<nzB; i++) {
1047             if (cmap[cworkB[i]/bs] < cstart)
1048               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1049             else break;
1050           }
1051           imark = i;
1052         }
1053         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1054         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1055       }
1056     } else {
1057       if (idx) *idx = 0;
1058       if (v)   *v   = 0;
1059     }
1060   }
1061   *nz = nztot;
1062   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1063   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1070 {
1071   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1072 
1073   PetscFunctionBegin;
1074   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1075   baij->getrowactive = PETSC_FALSE;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1082 {
1083   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1084   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1085 
1086   PetscFunctionBegin;
1087   aA->getrow_utriangular = PETSC_TRUE;
1088   PetscFunctionReturn(0);
1089 }
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1093 {
1094   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1095   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1096 
1097   PetscFunctionBegin;
1098   aA->getrow_utriangular = PETSC_FALSE;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1105 {
1106   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1111   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1118 {
1119   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1124   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1131 {
1132   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1137   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1144 {
1145   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1146   Mat            A = a->A,B = a->B;
1147   PetscErrorCode ierr;
1148   PetscReal      isend[5],irecv[5];
1149 
1150   PetscFunctionBegin;
1151   info->block_size     = (PetscReal)matin->rmap->bs;
1152   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1153   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1154   isend[3] = info->memory;  isend[4] = info->mallocs;
1155   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1156   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1157   isend[3] += info->memory;  isend[4] += info->mallocs;
1158   if (flag == MAT_LOCAL) {
1159     info->nz_used      = isend[0];
1160     info->nz_allocated = isend[1];
1161     info->nz_unneeded  = isend[2];
1162     info->memory       = isend[3];
1163     info->mallocs      = isend[4];
1164   } else if (flag == MAT_GLOBAL_MAX) {
1165     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1166     info->nz_used      = irecv[0];
1167     info->nz_allocated = irecv[1];
1168     info->nz_unneeded  = irecv[2];
1169     info->memory       = irecv[3];
1170     info->mallocs      = irecv[4];
1171   } else if (flag == MAT_GLOBAL_SUM) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else {
1179     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1180   }
1181   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1182   info->fill_ratio_needed = 0;
1183   info->factor_mallocs    = 0;
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1189 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1190 {
1191   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1192   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   switch (op) {
1197   case MAT_NEW_NONZERO_LOCATIONS:
1198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1199   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1200   case MAT_KEEP_NONZERO_PATTERN:
1201   case MAT_NEW_NONZERO_LOCATION_ERR:
1202     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1203     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1204     break;
1205   case MAT_ROW_ORIENTED:
1206     a->roworiented = flg;
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1209     break;
1210   case MAT_NEW_DIAGONALS:
1211     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1212     break;
1213   case MAT_IGNORE_OFF_PROC_ENTRIES:
1214     a->donotstash = flg;
1215     break;
1216   case MAT_USE_HASH_TABLE:
1217     a->ht_flag = flg;
1218     break;
1219   case MAT_HERMITIAN:
1220     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1221     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1222     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1223     break;
1224   case MAT_SPD:
1225     A->spd_set                         = PETSC_TRUE;
1226     A->spd                             = flg;
1227     if (flg) {
1228       A->symmetric                     = PETSC_TRUE;
1229       A->structurally_symmetric        = PETSC_TRUE;
1230       A->symmetric_set                 = PETSC_TRUE;
1231       A->structurally_symmetric_set    = PETSC_TRUE;
1232     }
1233     break;
1234   case MAT_SYMMETRIC:
1235     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1236     break;
1237   case MAT_STRUCTURALLY_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1239     break;
1240   case MAT_SYMMETRY_ETERNAL:
1241     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1242     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1243     break;
1244   case MAT_IGNORE_LOWER_TRIANGULAR:
1245     aA->ignore_ltriangular = flg;
1246     break;
1247   case MAT_ERROR_LOWER_TRIANGULAR:
1248     aA->ignore_ltriangular = flg;
1249     break;
1250   case MAT_GETROW_UPPERTRIANGULAR:
1251     aA->getrow_utriangular = flg;
1252     break;
1253   default:
1254     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1261 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1262 {
1263   PetscErrorCode ierr;
1264   PetscFunctionBegin;
1265   if (MAT_INITIAL_MATRIX || *B != A) {
1266     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1273 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1274 {
1275   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1276   Mat            a=baij->A, b=baij->B;
1277   PetscErrorCode ierr;
1278   PetscInt       nv,m,n;
1279   PetscTruth     flg;
1280 
1281   PetscFunctionBegin;
1282   if (ll != rr){
1283     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1284     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1285   }
1286   if (!ll) PetscFunctionReturn(0);
1287 
1288   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1289   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1290 
1291   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1292   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1293 
1294   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295 
1296   /* left diagonalscale the off-diagonal part */
1297   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1298 
1299   /* scale the diagonal part */
1300   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1301 
1302   /* right diagonalscale the off-diagonal part */
1303   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatEqual_MPISBAIJ"
1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1325 {
1326   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1327   Mat            a,b,c,d;
1328   PetscTruth     flg;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   a = matA->A; b = matA->B;
1333   c = matB->A; d = matB->B;
1334 
1335   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1336   if (flg) {
1337     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1338   }
1339   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCopy_MPISBAIJ"
1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1346 {
1347   PetscErrorCode ierr;
1348   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1349   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1350 
1351   PetscFunctionBegin;
1352   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1353   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1354     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1355     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1356     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1357   } else {
1358     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1359     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1366 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1367 {
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1377 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1378 {
1379   PetscErrorCode ierr;
1380   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1381   PetscBLASInt   bnz,one=1;
1382   Mat_SeqSBAIJ   *xa,*ya;
1383   Mat_SeqBAIJ    *xb,*yb;
1384 
1385   PetscFunctionBegin;
1386   if (str == SAME_NONZERO_PATTERN) {
1387     PetscScalar alpha = a;
1388     xa = (Mat_SeqSBAIJ *)xx->A->data;
1389     ya = (Mat_SeqSBAIJ *)yy->A->data;
1390     bnz = PetscBLASIntCast(xa->nz);
1391     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1392     xb = (Mat_SeqBAIJ *)xx->B->data;
1393     yb = (Mat_SeqBAIJ *)yy->B->data;
1394     bnz = PetscBLASIntCast(xb->nz);
1395     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1396   } else {
1397     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1398     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1399     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1406 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1407 {
1408   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1409   PetscInt        rbs,cbs;
1410   PetscErrorCode  ierr;
1411 
1412   PetscFunctionBegin;
1413   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1414   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1415   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1416   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1417   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1418   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1424 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1425 {
1426   PetscErrorCode ierr;
1427   PetscInt       i;
1428   PetscTruth     flg;
1429 
1430   PetscFunctionBegin;
1431   for (i=0; i<n; i++) {
1432     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1433     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1434   }
1435   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 
1440 /* -------------------------------------------------------------------*/
1441 static struct _MatOps MatOps_Values = {
1442        MatSetValues_MPISBAIJ,
1443        MatGetRow_MPISBAIJ,
1444        MatRestoreRow_MPISBAIJ,
1445        MatMult_MPISBAIJ,
1446 /* 4*/ MatMultAdd_MPISBAIJ,
1447        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1448        MatMultAdd_MPISBAIJ,
1449        0,
1450        0,
1451        0,
1452 /*10*/ 0,
1453        0,
1454        0,
1455        MatSOR_MPISBAIJ,
1456        MatTranspose_MPISBAIJ,
1457 /*15*/ MatGetInfo_MPISBAIJ,
1458        MatEqual_MPISBAIJ,
1459        MatGetDiagonal_MPISBAIJ,
1460        MatDiagonalScale_MPISBAIJ,
1461        MatNorm_MPISBAIJ,
1462 /*20*/ MatAssemblyBegin_MPISBAIJ,
1463        MatAssemblyEnd_MPISBAIJ,
1464        MatSetOption_MPISBAIJ,
1465        MatZeroEntries_MPISBAIJ,
1466 /*24*/ 0,
1467        0,
1468        0,
1469        0,
1470        0,
1471 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1472        0,
1473        0,
1474        0,
1475        0,
1476 /*34*/ MatDuplicate_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*39*/ MatAXPY_MPISBAIJ,
1482        MatGetSubMatrices_MPISBAIJ,
1483        MatIncreaseOverlap_MPISBAIJ,
1484        MatGetValues_MPISBAIJ,
1485        MatCopy_MPISBAIJ,
1486 /*44*/ 0,
1487        MatScale_MPISBAIJ,
1488        0,
1489        0,
1490        0,
1491 /*49*/ MatSetBlockSize_MPISBAIJ,
1492        0,
1493        0,
1494        0,
1495        0,
1496 /*54*/ 0,
1497        0,
1498        MatSetUnfactored_MPISBAIJ,
1499        0,
1500        MatSetValuesBlocked_MPISBAIJ,
1501 /*59*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*64*/ 0,
1507        0,
1508        0,
1509        0,
1510        0,
1511 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*74*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*79*/ 0,
1522        0,
1523        0,
1524        0,
1525        MatLoad_MPISBAIJ,
1526 /*84*/ 0,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*89*/ 0,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*94*/ 0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*99*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*104*/0,
1547        MatRealPart_MPISBAIJ,
1548        MatImaginaryPart_MPISBAIJ,
1549        MatGetRowUpperTriangular_MPISBAIJ,
1550        MatRestoreRowUpperTriangular_MPISBAIJ,
1551 /*109*/0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*114*/0,
1557        0,
1558        0,
1559        0,
1560        0,
1561 /*119*/0,
1562        0,
1563        0,
1564        0
1565 };
1566 
1567 
1568 EXTERN_C_BEGIN
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1571 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1572 {
1573   PetscFunctionBegin;
1574   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1575   *iscopy = PETSC_FALSE;
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1583 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1584 {
1585   Mat_MPISBAIJ   *b;
1586   PetscErrorCode ierr;
1587   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1588 
1589   PetscFunctionBegin;
1590   if (bs < 0){
1591     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1592       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1593     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1594     bs   = PetscAbs(bs);
1595   }
1596   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1597   bs = newbs;
1598 
1599   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1600   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1601   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1602   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1603 
1604   B->rmap->bs = B->cmap->bs = bs;
1605   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1606   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1607 
1608   if (d_nnz) {
1609     for (i=0; i<B->rmap->n/bs; i++) {
1610       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1611     }
1612   }
1613   if (o_nnz) {
1614     for (i=0; i<B->rmap->n/bs; i++) {
1615       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1616     }
1617   }
1618 
1619   b   = (Mat_MPISBAIJ*)B->data;
1620   mbs = B->rmap->n/bs;
1621   Mbs = B->rmap->N/bs;
1622   if (mbs*bs != B->rmap->n) {
1623     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1624   }
1625 
1626   B->rmap->bs  = bs;
1627   b->bs2 = bs*bs;
1628   b->mbs = mbs;
1629   b->nbs = mbs;
1630   b->Mbs = Mbs;
1631   b->Nbs = Mbs;
1632 
1633   for (i=0; i<=b->size; i++) {
1634     b->rangebs[i] = B->rmap->range[i]/bs;
1635   }
1636   b->rstartbs = B->rmap->rstart/bs;
1637   b->rendbs   = B->rmap->rend/bs;
1638 
1639   b->cstartbs = B->cmap->rstart/bs;
1640   b->cendbs   = B->cmap->rend/bs;
1641 
1642   if (!B->preallocated) {
1643     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1644     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1645     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1646     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1647     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1648     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1649     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1650     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1651     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1652   }
1653 
1654   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1655   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1656   B->preallocated = PETSC_TRUE;
1657   PetscFunctionReturn(0);
1658 }
1659 EXTERN_C_END
1660 
1661 EXTERN_C_BEGIN
1662 #if defined(PETSC_HAVE_MUMPS)
1663 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1664 #endif
1665 #if defined(PETSC_HAVE_SPOOLES)
1666 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1667 #endif
1668 #if defined(PETSC_HAVE_PASTIX)
1669 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1670 #endif
1671 EXTERN_C_END
1672 
1673 /*MC
1674    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1675    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1676 
1677    Options Database Keys:
1678 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1679 
1680   Level: beginner
1681 
1682 .seealso: MatCreateMPISBAIJ
1683 M*/
1684 
1685 EXTERN_C_BEGIN
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatCreate_MPISBAIJ"
1688 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1689 {
1690   Mat_MPISBAIJ   *b;
1691   PetscErrorCode ierr;
1692   PetscTruth     flg;
1693 
1694   PetscFunctionBegin;
1695 
1696   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1697   B->data = (void*)b;
1698   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1699 
1700   B->ops->destroy    = MatDestroy_MPISBAIJ;
1701   B->ops->view       = MatView_MPISBAIJ;
1702   B->mapping         = 0;
1703   B->assembled       = PETSC_FALSE;
1704 
1705   B->insertmode = NOT_SET_VALUES;
1706   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1707   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1708 
1709   /* build local table of row and column ownerships */
1710   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1711 
1712   /* build cache for off array entries formed */
1713   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1714   b->donotstash  = PETSC_FALSE;
1715   b->colmap      = PETSC_NULL;
1716   b->garray      = PETSC_NULL;
1717   b->roworiented = PETSC_TRUE;
1718 
1719   /* stuff used in block assembly */
1720   b->barray       = 0;
1721 
1722   /* stuff used for matrix vector multiply */
1723   b->lvec         = 0;
1724   b->Mvctx        = 0;
1725   b->slvec0       = 0;
1726   b->slvec0b      = 0;
1727   b->slvec1       = 0;
1728   b->slvec1a      = 0;
1729   b->slvec1b      = 0;
1730   b->sMvctx       = 0;
1731 
1732   /* stuff for MatGetRow() */
1733   b->rowindices   = 0;
1734   b->rowvalues    = 0;
1735   b->getrowactive = PETSC_FALSE;
1736 
1737   /* hash table stuff */
1738   b->ht           = 0;
1739   b->hd           = 0;
1740   b->ht_size      = 0;
1741   b->ht_flag      = PETSC_FALSE;
1742   b->ht_fact      = 0;
1743   b->ht_total_ct  = 0;
1744   b->ht_insert_ct = 0;
1745 
1746   b->in_loc       = 0;
1747   b->v_loc        = 0;
1748   b->n_loc        = 0;
1749   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1750     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1751     if (flg) {
1752       PetscReal fact = 1.39;
1753       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1754       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1755       if (fact <= 1.0) fact = 1.39;
1756       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1757       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1758     }
1759   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1760 
1761 #if defined(PETSC_HAVE_PASTIX)
1762   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1763 					   "MatGetFactor_mpisbaij_pastix",
1764 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1765 #endif
1766 #if defined(PETSC_HAVE_MUMPS)
1767   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1768                                      "MatGetFactor_sbaij_mumps",
1769                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1770 #endif
1771 #if defined(PETSC_HAVE_SPOOLES)
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1773                                      "MatGetFactor_mpisbaij_spooles",
1774                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1775 #endif
1776   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1777                                      "MatStoreValues_MPISBAIJ",
1778                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1779   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1780                                      "MatRetrieveValues_MPISBAIJ",
1781                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1783                                      "MatGetDiagonalBlock_MPISBAIJ",
1784                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1786                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1787                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1788   B->symmetric                  = PETSC_TRUE;
1789   B->structurally_symmetric     = PETSC_TRUE;
1790   B->symmetric_set              = PETSC_TRUE;
1791   B->structurally_symmetric_set = PETSC_TRUE;
1792   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1793   PetscFunctionReturn(0);
1794 }
1795 EXTERN_C_END
1796 
1797 /*MC
1798    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1799 
1800    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1801    and MATMPISBAIJ otherwise.
1802 
1803    Options Database Keys:
1804 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1805 
1806   Level: beginner
1807 
1808 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1809 M*/
1810 
1811 EXTERN_C_BEGIN
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatCreate_SBAIJ"
1814 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1815 {
1816   PetscErrorCode ierr;
1817   PetscMPIInt    size;
1818 
1819   PetscFunctionBegin;
1820   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1821   if (size == 1) {
1822     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1823   } else {
1824     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1825   }
1826   PetscFunctionReturn(0);
1827 }
1828 EXTERN_C_END
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1832 /*@C
1833    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1834    the user should preallocate the matrix storage by setting the parameters
1835    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1836    performance can be increased by more than a factor of 50.
1837 
1838    Collective on Mat
1839 
1840    Input Parameters:
1841 +  A - the matrix
1842 .  bs   - size of blockk
1843 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1844            submatrix  (same for all local rows)
1845 .  d_nnz - array containing the number of block nonzeros in the various block rows
1846            in the upper triangular and diagonal part of the in diagonal portion of the local
1847            (possibly different for each block row) or PETSC_NULL.  You must leave room
1848            for the diagonal entry even if it is zero.
1849 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1850            submatrix (same for all local rows).
1851 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1852            off-diagonal portion of the local submatrix (possibly different for
1853            each block row) or PETSC_NULL.
1854 
1855 
1856    Options Database Keys:
1857 .   -mat_no_unroll - uses code that does not unroll the loops in the
1858                      block calculations (much slower)
1859 .   -mat_block_size - size of the blocks to use
1860 
1861    Notes:
1862 
1863    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1864    than it must be used on all processors that share the object for that argument.
1865 
1866    If the *_nnz parameter is given then the *_nz parameter is ignored
1867 
1868    Storage Information:
1869    For a square global matrix we define each processor's diagonal portion
1870    to be its local rows and the corresponding columns (a square submatrix);
1871    each processor's off-diagonal portion encompasses the remainder of the
1872    local matrix (a rectangular submatrix).
1873 
1874    The user can specify preallocated storage for the diagonal part of
1875    the local submatrix with either d_nz or d_nnz (not both).  Set
1876    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1877    memory allocation.  Likewise, specify preallocated storage for the
1878    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1879 
1880    You can call MatGetInfo() to get information on how effective the preallocation was;
1881    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1882    You can also run with the option -info and look for messages with the string
1883    malloc in them to see if additional memory allocation was needed.
1884 
1885    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1886    the figure below we depict these three local rows and all columns (0-11).
1887 
1888 .vb
1889            0 1 2 3 4 5 6 7 8 9 10 11
1890           -------------------
1891    row 3  |  o o o d d d o o o o o o
1892    row 4  |  o o o d d d o o o o o o
1893    row 5  |  o o o d d d o o o o o o
1894           -------------------
1895 .ve
1896 
1897    Thus, any entries in the d locations are stored in the d (diagonal)
1898    submatrix, and any entries in the o locations are stored in the
1899    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1900    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1901 
1902    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1903    plus the diagonal part of the d matrix,
1904    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1905    In general, for PDE problems in which most nonzeros are near the diagonal,
1906    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1907    or you will get TERRIBLE performance; see the users' manual chapter on
1908    matrices.
1909 
1910    Level: intermediate
1911 
1912 .keywords: matrix, block, aij, compressed row, sparse, parallel
1913 
1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1915 @*/
1916 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1917 {
1918   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1919 
1920   PetscFunctionBegin;
1921   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1922   if (f) {
1923     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatCreateMPISBAIJ"
1930 /*@C
1931    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1932    (block compressed row).  For good matrix assembly performance
1933    the user should preallocate the matrix storage by setting the parameters
1934    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1935    performance can be increased by more than a factor of 50.
1936 
1937    Collective on MPI_Comm
1938 
1939    Input Parameters:
1940 +  comm - MPI communicator
1941 .  bs   - size of blockk
1942 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1943            This value should be the same as the local size used in creating the
1944            y vector for the matrix-vector product y = Ax.
1945 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1946            This value should be the same as the local size used in creating the
1947            x vector for the matrix-vector product y = Ax.
1948 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1949 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1950 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1951            submatrix  (same for all local rows)
1952 .  d_nnz - array containing the number of block nonzeros in the various block rows
1953            in the upper triangular portion of the in diagonal portion of the local
1954            (possibly different for each block block row) or PETSC_NULL.
1955            You must leave room for the diagonal entry even if it is zero.
1956 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1957            submatrix (same for all local rows).
1958 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1959            off-diagonal portion of the local submatrix (possibly different for
1960            each block row) or PETSC_NULL.
1961 
1962    Output Parameter:
1963 .  A - the matrix
1964 
1965    Options Database Keys:
1966 .   -mat_no_unroll - uses code that does not unroll the loops in the
1967                      block calculations (much slower)
1968 .   -mat_block_size - size of the blocks to use
1969 .   -mat_mpi - use the parallel matrix data structures even on one processor
1970                (defaults to using SeqBAIJ format on one processor)
1971 
1972    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1973    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1974    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1975 
1976    Notes:
1977    The number of rows and columns must be divisible by blocksize.
1978    This matrix type does not support complex Hermitian operation.
1979 
1980    The user MUST specify either the local or global matrix dimensions
1981    (possibly both).
1982 
1983    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1984    than it must be used on all processors that share the object for that argument.
1985 
1986    If the *_nnz parameter is given then the *_nz parameter is ignored
1987 
1988    Storage Information:
1989    For a square global matrix we define each processor's diagonal portion
1990    to be its local rows and the corresponding columns (a square submatrix);
1991    each processor's off-diagonal portion encompasses the remainder of the
1992    local matrix (a rectangular submatrix).
1993 
1994    The user can specify preallocated storage for the diagonal part of
1995    the local submatrix with either d_nz or d_nnz (not both).  Set
1996    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1997    memory allocation.  Likewise, specify preallocated storage for the
1998    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1999 
2000    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2001    the figure below we depict these three local rows and all columns (0-11).
2002 
2003 .vb
2004            0 1 2 3 4 5 6 7 8 9 10 11
2005           -------------------
2006    row 3  |  o o o d d d o o o o o o
2007    row 4  |  o o o d d d o o o o o o
2008    row 5  |  o o o d d d o o o o o o
2009           -------------------
2010 .ve
2011 
2012    Thus, any entries in the d locations are stored in the d (diagonal)
2013    submatrix, and any entries in the o locations are stored in the
2014    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2015    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2016 
2017    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2018    plus the diagonal part of the d matrix,
2019    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2020    In general, for PDE problems in which most nonzeros are near the diagonal,
2021    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2022    or you will get TERRIBLE performance; see the users' manual chapter on
2023    matrices.
2024 
2025    Level: intermediate
2026 
2027 .keywords: matrix, block, aij, compressed row, sparse, parallel
2028 
2029 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2030 @*/
2031 
2032 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2033 {
2034   PetscErrorCode ierr;
2035   PetscMPIInt    size;
2036 
2037   PetscFunctionBegin;
2038   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2039   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2040   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2041   if (size > 1) {
2042     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2043     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2044   } else {
2045     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2046     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2054 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2055 {
2056   Mat            mat;
2057   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2058   PetscErrorCode ierr;
2059   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2060   PetscScalar    *array;
2061 
2062   PetscFunctionBegin;
2063   *newmat       = 0;
2064   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2065   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2066   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2067   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2068   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2069   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2070 
2071   mat->factortype   = matin->factortype;
2072   mat->preallocated = PETSC_TRUE;
2073   mat->assembled    = PETSC_TRUE;
2074   mat->insertmode   = NOT_SET_VALUES;
2075 
2076   a = (Mat_MPISBAIJ*)mat->data;
2077   a->bs2   = oldmat->bs2;
2078   a->mbs   = oldmat->mbs;
2079   a->nbs   = oldmat->nbs;
2080   a->Mbs   = oldmat->Mbs;
2081   a->Nbs   = oldmat->Nbs;
2082 
2083 
2084   a->size         = oldmat->size;
2085   a->rank         = oldmat->rank;
2086   a->donotstash   = oldmat->donotstash;
2087   a->roworiented  = oldmat->roworiented;
2088   a->rowindices   = 0;
2089   a->rowvalues    = 0;
2090   a->getrowactive = PETSC_FALSE;
2091   a->barray       = 0;
2092   a->rstartbs    = oldmat->rstartbs;
2093   a->rendbs      = oldmat->rendbs;
2094   a->cstartbs    = oldmat->cstartbs;
2095   a->cendbs      = oldmat->cendbs;
2096 
2097   /* hash table stuff */
2098   a->ht           = 0;
2099   a->hd           = 0;
2100   a->ht_size      = 0;
2101   a->ht_flag      = oldmat->ht_flag;
2102   a->ht_fact      = oldmat->ht_fact;
2103   a->ht_total_ct  = 0;
2104   a->ht_insert_ct = 0;
2105 
2106   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2107   if (oldmat->colmap) {
2108 #if defined (PETSC_USE_CTABLE)
2109     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2110 #else
2111     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2112     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2113     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2114 #endif
2115   } else a->colmap = 0;
2116 
2117   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2118     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2119     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2120     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2121   } else a->garray = 0;
2122 
2123   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2124   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2125   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2126   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2127   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2128 
2129   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2130   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2131   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2133 
2134   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2135   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2136   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2137   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2138   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2139   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2140   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2141   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2142   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2146   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2147 
2148   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2149   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2150   a->sMvctx = oldmat->sMvctx;
2151   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2152 
2153   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2154   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2155   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2157   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2158   *newmat = mat;
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatLoad_MPISBAIJ"
2164 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2165 {
2166   PetscErrorCode ierr;
2167   PetscInt       i,nz,j,rstart,rend;
2168   PetscScalar    *vals,*buf;
2169   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2170   MPI_Status     status;
2171   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2172   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2173   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2174   PetscInt       bs=1,Mbs,mbs,extra_rows;
2175   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2176   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2177   int            fd;
2178 
2179   PetscFunctionBegin;
2180   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2181     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2182   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2183 
2184   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2185   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2186   if (!rank) {
2187     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2188     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2189     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2190     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2191   }
2192 
2193   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2194 
2195   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2196   M = header[1]; N = header[2];
2197 
2198   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2199   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2200   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2201 
2202   /* If global sizes are set, check if they are consistent with that given in the file */
2203   if (sizesset) {
2204     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2205   }
2206   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2207   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2208 
2209   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2210 
2211   /*
2212      This code adds extra rows to make sure the number of rows is
2213      divisible by the blocksize
2214   */
2215   Mbs        = M/bs;
2216   extra_rows = bs - M + bs*(Mbs);
2217   if (extra_rows == bs) extra_rows = 0;
2218   else                  Mbs++;
2219   if (extra_rows &&!rank) {
2220     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2221   }
2222 
2223   /* determine ownership of all rows */
2224   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2225     mbs        = Mbs/size + ((Mbs % size) > rank);
2226     m          = mbs*bs;
2227   } else { /* User Set */
2228     m          = newmat->rmap->n;
2229     mbs        = m/bs;
2230   }
2231   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2232   mmbs       = PetscMPIIntCast(mbs);
2233   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2234   rowners[0] = 0;
2235   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2236   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2237   rstart = rowners[rank];
2238   rend   = rowners[rank+1];
2239 
2240   /* distribute row lengths to all processors */
2241   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2242   if (!rank) {
2243     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2244     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2245     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2246     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2247     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2248     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2249     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2250   } else {
2251     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2252   }
2253 
2254   if (!rank) {   /* procs[0] */
2255     /* calculate the number of nonzeros on each processor */
2256     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2257     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2258     for (i=0; i<size; i++) {
2259       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2260         procsnz[i] += rowlengths[j];
2261       }
2262     }
2263     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2264 
2265     /* determine max buffer needed and allocate it */
2266     maxnz = 0;
2267     for (i=0; i<size; i++) {
2268       maxnz = PetscMax(maxnz,procsnz[i]);
2269     }
2270     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2271 
2272     /* read in my part of the matrix column indices  */
2273     nz     = procsnz[0];
2274     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2275     mycols = ibuf;
2276     if (size == 1)  nz -= extra_rows;
2277     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2278     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2279 
2280     /* read in every ones (except the last) and ship off */
2281     for (i=1; i<size-1; i++) {
2282       nz   = procsnz[i];
2283       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2284       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2285     }
2286     /* read in the stuff for the last proc */
2287     if (size != 1) {
2288       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2289       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2290       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2291       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2292     }
2293     ierr = PetscFree(cols);CHKERRQ(ierr);
2294   } else {  /* procs[i], i>0 */
2295     /* determine buffer space needed for message */
2296     nz = 0;
2297     for (i=0; i<m; i++) {
2298       nz += locrowlens[i];
2299     }
2300     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2301     mycols = ibuf;
2302     /* receive message of column indices*/
2303     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2304     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2305     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2306   }
2307 
2308   /* loop over local rows, determining number of off diagonal entries */
2309   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2310   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2311   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2312   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2313   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2314   rowcount = 0;
2315   nzcount  = 0;
2316   for (i=0; i<mbs; i++) {
2317     dcount  = 0;
2318     odcount = 0;
2319     for (j=0; j<bs; j++) {
2320       kmax = locrowlens[rowcount];
2321       for (k=0; k<kmax; k++) {
2322         tmp = mycols[nzcount++]/bs; /* block col. index */
2323         if (!mask[tmp]) {
2324           mask[tmp] = 1;
2325           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2326           else masked1[dcount++] = tmp; /* entry in diag portion */
2327         }
2328       }
2329       rowcount++;
2330     }
2331 
2332     dlens[i]  = dcount;  /* d_nzz[i] */
2333     odlens[i] = odcount; /* o_nzz[i] */
2334 
2335     /* zero out the mask elements we set */
2336     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2337     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2338   }
2339     if (!sizesset) {
2340     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2341   }
2342   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2343   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2344 
2345   if (!rank) {
2346     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2347     /* read in my part of the matrix numerical values  */
2348     nz = procsnz[0];
2349     vals = buf;
2350     mycols = ibuf;
2351     if (size == 1)  nz -= extra_rows;
2352     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2353     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2354 
2355     /* insert into matrix */
2356     jj      = rstart*bs;
2357     for (i=0; i<m; i++) {
2358       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2359       mycols += locrowlens[i];
2360       vals   += locrowlens[i];
2361       jj++;
2362     }
2363 
2364     /* read in other processors (except the last one) and ship out */
2365     for (i=1; i<size-1; i++) {
2366       nz   = procsnz[i];
2367       vals = buf;
2368       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2369       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2370     }
2371     /* the last proc */
2372     if (size != 1){
2373       nz   = procsnz[i] - extra_rows;
2374       vals = buf;
2375       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2376       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2377       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2378     }
2379     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2380 
2381   } else {
2382     /* receive numeric values */
2383     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2384 
2385     /* receive message of values*/
2386     vals   = buf;
2387     mycols = ibuf;
2388     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2389     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2390     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2391 
2392     /* insert into matrix */
2393     jj      = rstart*bs;
2394     for (i=0; i<m; i++) {
2395       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2396       mycols += locrowlens[i];
2397       vals   += locrowlens[i];
2398       jj++;
2399     }
2400   }
2401 
2402   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2403   ierr = PetscFree(buf);CHKERRQ(ierr);
2404   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2405   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2406   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2407   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2408   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2415 /*XXXXX@
2416    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2417 
2418    Input Parameters:
2419 .  mat  - the matrix
2420 .  fact - factor
2421 
2422    Not Collective on Mat, each process can have a different hash factor
2423 
2424    Level: advanced
2425 
2426   Notes:
2427    This can also be set by the command line option: -mat_use_hash_table fact
2428 
2429 .keywords: matrix, hashtable, factor, HT
2430 
2431 .seealso: MatSetOption()
2432 @XXXXX*/
2433 
2434 
2435 #undef __FUNCT__
2436 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2437 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2438 {
2439   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2440   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2441   PetscReal      atmp;
2442   PetscReal      *work,*svalues,*rvalues;
2443   PetscErrorCode ierr;
2444   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2445   PetscMPIInt    rank,size;
2446   PetscInt       *rowners_bs,dest,count,source;
2447   PetscScalar    *va;
2448   MatScalar      *ba;
2449   MPI_Status     stat;
2450 
2451   PetscFunctionBegin;
2452   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2453   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2454   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2455 
2456   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2457   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2458 
2459   bs   = A->rmap->bs;
2460   mbs  = a->mbs;
2461   Mbs  = a->Mbs;
2462   ba   = b->a;
2463   bi   = b->i;
2464   bj   = b->j;
2465 
2466   /* find ownerships */
2467   rowners_bs = A->rmap->range;
2468 
2469   /* each proc creates an array to be distributed */
2470   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2471   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2472 
2473   /* row_max for B */
2474   if (rank != size-1){
2475     for (i=0; i<mbs; i++) {
2476       ncols = bi[1] - bi[0]; bi++;
2477       brow  = bs*i;
2478       for (j=0; j<ncols; j++){
2479         bcol = bs*(*bj);
2480         for (kcol=0; kcol<bs; kcol++){
2481           col = bcol + kcol;                 /* local col index */
2482           col += rowners_bs[rank+1];      /* global col index */
2483           for (krow=0; krow<bs; krow++){
2484             atmp = PetscAbsScalar(*ba); ba++;
2485             row = brow + krow;    /* local row index */
2486             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2487             if (work[col] < atmp) work[col] = atmp;
2488           }
2489         }
2490         bj++;
2491       }
2492     }
2493 
2494     /* send values to its owners */
2495     for (dest=rank+1; dest<size; dest++){
2496       svalues = work + rowners_bs[dest];
2497       count   = rowners_bs[dest+1]-rowners_bs[dest];
2498       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2499     }
2500   }
2501 
2502   /* receive values */
2503   if (rank){
2504     rvalues = work;
2505     count   = rowners_bs[rank+1]-rowners_bs[rank];
2506     for (source=0; source<rank; source++){
2507       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2508       /* process values */
2509       for (i=0; i<count; i++){
2510         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2511       }
2512     }
2513   }
2514 
2515   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2516   ierr = PetscFree(work);CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatSOR_MPISBAIJ"
2522 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2523 {
2524   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2525   PetscErrorCode    ierr;
2526   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2527   PetscScalar       *x,*ptr,*from;
2528   Vec               bb1;
2529   const PetscScalar *b;
2530 
2531   PetscFunctionBegin;
2532   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2533   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2534 
2535   if (flag == SOR_APPLY_UPPER) {
2536     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2537     PetscFunctionReturn(0);
2538   }
2539 
2540   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2541     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2542       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2543       its--;
2544     }
2545 
2546     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2547     while (its--){
2548 
2549       /* lower triangular part: slvec0b = - B^T*xx */
2550       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2551 
2552       /* copy xx into slvec0a */
2553       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2554       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2555       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2556       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2557 
2558       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2559 
2560       /* copy bb into slvec1a */
2561       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2562       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2563       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2564       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2565 
2566       /* set slvec1b = 0 */
2567       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2568 
2569       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2570       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2571       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2572       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2573 
2574       /* upper triangular part: bb1 = bb1 - B*x */
2575       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2576 
2577       /* local diagonal sweep */
2578       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2579     }
2580     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2581   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2582     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2583   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2584     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2585   } else if (flag & SOR_EISENSTAT) {
2586     Vec               xx1;
2587     PetscTruth        hasop;
2588     const PetscScalar *diag;
2589     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2590     PetscInt          i,n;
2591 
2592     if (!mat->xx1) {
2593       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2594       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2595     }
2596     xx1 = mat->xx1;
2597     bb1 = mat->bb1;
2598 
2599     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2600 
2601     if (!mat->diag) {
2602       /* this is wrong for same matrix with new nonzero values */
2603       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2604       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2605     }
2606     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2607 
2608     if (hasop) {
2609       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2610       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2611     } else {
2612       /*
2613           These two lines are replaced by code that may be a bit faster for a good compiler
2614       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2615       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2616       */
2617       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2618       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2619       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2620       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2621       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2622       if (omega == 1.0) {
2623 	for (i=0; i<n; i++) {
2624 	  sl[i] = b[i] - diag[i]*x[i];
2625 	}
2626         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2627       } else {
2628 	for (i=0; i<n; i++) {
2629 	  sl[i] = b[i] + scale*diag[i]*x[i];
2630 	}
2631         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2632       }
2633       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2634       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2635       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2636       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2637     }
2638 
2639     /* multiply off-diagonal portion of matrix */
2640     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2641     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2642     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2643     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2644     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2645     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2646     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2647     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2648     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2649     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2650 
2651     /* local sweep */
2652     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2653     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2654   } else {
2655     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2656   }
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 #undef __FUNCT__
2661 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2662 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2663 {
2664   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2665   PetscErrorCode ierr;
2666   Vec            lvec1,bb1;
2667 
2668   PetscFunctionBegin;
2669   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2670   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2671 
2672   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2673     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2674       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2675       its--;
2676     }
2677 
2678     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2679     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2680     while (its--){
2681       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2682 
2683       /* lower diagonal part: bb1 = bb - B^T*xx */
2684       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2685       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2686 
2687       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2689       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2690 
2691       /* upper diagonal part: bb1 = bb1 - B*x */
2692       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2693       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2694 
2695       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2696 
2697       /* diagonal sweep */
2698       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2699     }
2700     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2701     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2702   } else {
2703     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2704   }
2705   PetscFunctionReturn(0);
2706 }
2707 
2708