xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision f3fe499b4cc4d64bf04aa4f5e4963dcc4eb56541)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscTruth     roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend){
379           col = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined (PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data --;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else {
399       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
400     }
401   }
402  PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatNorm_MPISBAIJ"
407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
408 {
409   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscReal      sum[2],*lnorm2;
412 
413   PetscFunctionBegin;
414   if (baij->size == 1) {
415     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
416   } else {
417     if (type == NORM_FROBENIUS) {
418       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
419       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
421       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
423       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
424       *norm = sqrt(sum[0] + 2*sum[1]);
425       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
426     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
427       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
428       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
429       PetscReal    *rsum,*rsum2,vabs;
430       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
431       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
432       MatScalar    *v;
433 
434       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
435       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
436       /* Amat */
437       v = amat->a; jj = amat->j;
438       for (brow=0; brow<mbs; brow++) {
439         grow = bs*(rstart + brow);
440         nz = amat->i[brow+1] - amat->i[brow];
441         for (bcol=0; bcol<nz; bcol++){
442           gcol = bs*(rstart + *jj); jj++;
443           for (col=0; col<bs; col++){
444             for (row=0; row<bs; row++){
445               vabs = PetscAbsScalar(*v); v++;
446               rsum[gcol+col] += vabs;
447               /* non-diagonal block */
448               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
449             }
450           }
451         }
452       }
453       /* Bmat */
454       v = bmat->a; jj = bmat->j;
455       for (brow=0; brow<mbs; brow++) {
456         grow = bs*(rstart + brow);
457         nz = bmat->i[brow+1] - bmat->i[brow];
458         for (bcol=0; bcol<nz; bcol++){
459           gcol = bs*garray[*jj]; jj++;
460           for (col=0; col<bs; col++){
461             for (row=0; row<bs; row++){
462               vabs = PetscAbsScalar(*v); v++;
463               rsum[gcol+col] += vabs;
464               rsum[grow+row] += vabs;
465             }
466           }
467         }
468       }
469       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
470       *norm = 0.0;
471       for (col=0; col<mat->cmap->N; col++) {
472         if (rsum2[col] > *norm) *norm = rsum2[col];
473       }
474       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
475     } else {
476       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
477     }
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
485 {
486   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
487   PetscErrorCode ierr;
488   PetscInt       nstash,reallocs;
489   InsertMode     addv;
490 
491   PetscFunctionBegin;
492   if (baij->donotstash) {
493     PetscFunctionReturn(0);
494   }
495 
496   /* make sure all processors are either in INSERTMODE or ADDMODE */
497   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
498   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807 
808   /* diagonal part */
809   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
810   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
811 
812   /* subdiagonal part */
813   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
814 
815   /* copy x into the vec slvec0 */
816   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
820   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   /* supperdiagonal part */
826   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMult_MPISBAIJ"
832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
833 {
834   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
835   PetscErrorCode ierr;
836   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
837   PetscScalar    *x,*from;
838 
839   PetscFunctionBegin;
840   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
841   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
842 
843   /* diagonal part */
844   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
845   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
846 
847   /* subdiagonal part */
848   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
849 
850   /* copy x into the vec slvec0 */
851   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
855   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* supperdiagonal part */
861   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
868 {
869   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
870   PetscErrorCode ierr;
871   PetscInt       nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876 
877   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
878   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
879 
880   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   /* do diagonal part */
882   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
883   /* do supperdiagonal part */
884   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
886   /* do subdiagonal part */
887   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
888   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890 
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
897 {
898   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
899   PetscErrorCode ierr;
900   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
901   PetscScalar    *x,*from,zero=0.0;
902 
903   PetscFunctionBegin;
904   /*
905   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
906   PetscSynchronizedFlush(((PetscObject)A)->comm);
907   */
908   /* diagonal part */
909   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
910   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
911 
912   /* subdiagonal part */
913   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
914 
915   /* copy x into the vec slvec0 */
916   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
917   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
918   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
919   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
920 
921   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
923   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924 
925   /* supperdiagonal part */
926   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   /* do diagonal part */
941   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
942   /* do supperdiagonal part */
943   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
945 
946   /* do subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
948   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950 
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) { max = tmp; }
1009     }
1010     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap  = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart)
1047               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1048             else break;
1049           }
1050           imark = i;
1051         }
1052         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1053         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054       }
1055     } else {
1056       if (idx) *idx = 0;
1057       if (v)   *v   = 0;
1058     }
1059   }
1060   *nz = nztot;
1061   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1069 {
1070   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1071 
1072   PetscFunctionBegin;
1073   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1074   baij->getrowactive = PETSC_FALSE;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1081 {
1082   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1083   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1084 
1085   PetscFunctionBegin;
1086   aA->getrow_utriangular = PETSC_TRUE;
1087   PetscFunctionReturn(0);
1088 }
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1092 {
1093   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1094   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1095 
1096   PetscFunctionBegin;
1097   aA->getrow_utriangular = PETSC_FALSE;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1110   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1117 {
1118   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1123   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1130 {
1131   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1136   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1143 {
1144   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1145   Mat            A = a->A,B = a->B;
1146   PetscErrorCode ierr;
1147   PetscReal      isend[5],irecv[5];
1148 
1149   PetscFunctionBegin;
1150   info->block_size     = (PetscReal)matin->rmap->bs;
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153   isend[3] = info->memory;  isend[4] = info->mallocs;
1154   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1156   isend[3] += info->memory;  isend[4] += info->mallocs;
1157   if (flag == MAT_LOCAL) {
1158     info->nz_used      = isend[0];
1159     info->nz_allocated = isend[1];
1160     info->nz_unneeded  = isend[2];
1161     info->memory       = isend[3];
1162     info->mallocs      = isend[4];
1163   } else if (flag == MAT_GLOBAL_MAX) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   } else if (flag == MAT_GLOBAL_SUM) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else {
1178     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1179   }
1180   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1181   info->fill_ratio_needed = 0;
1182   info->factor_mallocs    = 0;
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1189 {
1190   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1191   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   switch (op) {
1196   case MAT_NEW_NONZERO_LOCATIONS:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1199   case MAT_KEEP_NONZERO_PATTERN:
1200   case MAT_NEW_NONZERO_LOCATION_ERR:
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = flg;
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_NEW_DIAGONALS:
1210     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1211     break;
1212   case MAT_IGNORE_OFF_PROC_ENTRIES:
1213     a->donotstash = flg;
1214     break;
1215   case MAT_USE_HASH_TABLE:
1216     a->ht_flag = flg;
1217     break;
1218   case MAT_HERMITIAN:
1219     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1220     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1221     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1222     break;
1223   case MAT_SPD:
1224     A->spd_set                         = PETSC_TRUE;
1225     A->spd                             = flg;
1226     if (flg) {
1227       A->symmetric                     = PETSC_TRUE;
1228       A->structurally_symmetric        = PETSC_TRUE;
1229       A->symmetric_set                 = PETSC_TRUE;
1230       A->structurally_symmetric_set    = PETSC_TRUE;
1231     }
1232     break;
1233   case MAT_SYMMETRIC:
1234     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1235     break;
1236   case MAT_STRUCTURALLY_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_SYMMETRY_ETERNAL:
1240     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1241     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1242     break;
1243   case MAT_IGNORE_LOWER_TRIANGULAR:
1244     aA->ignore_ltriangular = flg;
1245     break;
1246   case MAT_ERROR_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_GETROW_UPPERTRIANGULAR:
1250     aA->getrow_utriangular = flg;
1251     break;
1252   default:
1253     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1261 {
1262   PetscErrorCode ierr;
1263   PetscFunctionBegin;
1264   if (MAT_INITIAL_MATRIX || *B != A) {
1265     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1273 {
1274   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1275   Mat            a=baij->A, b=baij->B;
1276   PetscErrorCode ierr;
1277   PetscInt       nv,m,n;
1278   PetscTruth     flg;
1279 
1280   PetscFunctionBegin;
1281   if (ll != rr){
1282     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1283     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1284   }
1285   if (!ll) PetscFunctionReturn(0);
1286 
1287   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1288   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1289 
1290   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1291   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1292 
1293   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1294 
1295   /* left diagonalscale the off-diagonal part */
1296   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1297 
1298   /* scale the diagonal part */
1299   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1300 
1301   /* right diagonalscale the off-diagonal part */
1302   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1303   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1309 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1310 {
1311   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatEqual_MPISBAIJ"
1323 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1324 {
1325   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1326   Mat            a,b,c,d;
1327   PetscTruth     flg;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   a = matA->A; b = matA->B;
1332   c = matB->A; d = matB->B;
1333 
1334   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1335   if (flg) {
1336     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1337   }
1338   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatCopy_MPISBAIJ"
1344 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1345 {
1346   PetscErrorCode ierr;
1347   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1348   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1349 
1350   PetscFunctionBegin;
1351   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1352   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1353     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1354     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1355     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1356   } else {
1357     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1358     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1359   }
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1365 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1366 {
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1377 {
1378   PetscErrorCode ierr;
1379   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1380   PetscBLASInt   bnz,one=1;
1381   Mat_SeqSBAIJ   *xa,*ya;
1382   Mat_SeqBAIJ    *xb,*yb;
1383 
1384   PetscFunctionBegin;
1385   if (str == SAME_NONZERO_PATTERN) {
1386     PetscScalar alpha = a;
1387     xa = (Mat_SeqSBAIJ *)xx->A->data;
1388     ya = (Mat_SeqSBAIJ *)yy->A->data;
1389     bnz = PetscBLASIntCast(xa->nz);
1390     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1391     xb = (Mat_SeqBAIJ *)xx->B->data;
1392     yb = (Mat_SeqBAIJ *)yy->B->data;
1393     bnz = PetscBLASIntCast(xb->nz);
1394     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1395   } else {
1396     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1397     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1398     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1406 {
1407   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1408   PetscInt        rbs,cbs;
1409   PetscErrorCode  ierr;
1410 
1411   PetscFunctionBegin;
1412   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1413   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1414   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1415   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1416   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1417   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1424 {
1425   PetscErrorCode ierr;
1426   PetscInt       i;
1427   PetscTruth     flg;
1428 
1429   PetscFunctionBegin;
1430   for (i=0; i<n; i++) {
1431     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1432     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1433   }
1434   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 
1439 /* -------------------------------------------------------------------*/
1440 static struct _MatOps MatOps_Values = {
1441        MatSetValues_MPISBAIJ,
1442        MatGetRow_MPISBAIJ,
1443        MatRestoreRow_MPISBAIJ,
1444        MatMult_MPISBAIJ,
1445 /* 4*/ MatMultAdd_MPISBAIJ,
1446        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1447        MatMultAdd_MPISBAIJ,
1448        0,
1449        0,
1450        0,
1451 /*10*/ 0,
1452        0,
1453        0,
1454        MatSOR_MPISBAIJ,
1455        MatTranspose_MPISBAIJ,
1456 /*15*/ MatGetInfo_MPISBAIJ,
1457        MatEqual_MPISBAIJ,
1458        MatGetDiagonal_MPISBAIJ,
1459        MatDiagonalScale_MPISBAIJ,
1460        MatNorm_MPISBAIJ,
1461 /*20*/ MatAssemblyBegin_MPISBAIJ,
1462        MatAssemblyEnd_MPISBAIJ,
1463        MatSetOption_MPISBAIJ,
1464        MatZeroEntries_MPISBAIJ,
1465 /*24*/ 0,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1471        0,
1472        0,
1473        0,
1474        0,
1475 /*34*/ MatDuplicate_MPISBAIJ,
1476        0,
1477        0,
1478        0,
1479        0,
1480 /*39*/ MatAXPY_MPISBAIJ,
1481        MatGetSubMatrices_MPISBAIJ,
1482        MatIncreaseOverlap_MPISBAIJ,
1483        MatGetValues_MPISBAIJ,
1484        MatCopy_MPISBAIJ,
1485 /*44*/ 0,
1486        MatScale_MPISBAIJ,
1487        0,
1488        0,
1489        0,
1490 /*49*/ MatSetBlockSize_MPISBAIJ,
1491        0,
1492        0,
1493        0,
1494        0,
1495 /*54*/ 0,
1496        0,
1497        MatSetUnfactored_MPISBAIJ,
1498        0,
1499        MatSetValuesBlocked_MPISBAIJ,
1500 /*59*/ 0,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*64*/ 0,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*74*/ 0,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*79*/ 0,
1521        0,
1522        0,
1523        0,
1524        MatLoad_MPISBAIJ,
1525 /*84*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*89*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*94*/ 0,
1536        0,
1537        0,
1538        0,
1539        0,
1540 /*99*/ 0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*104*/0,
1546        MatRealPart_MPISBAIJ,
1547        MatImaginaryPart_MPISBAIJ,
1548        MatGetRowUpperTriangular_MPISBAIJ,
1549        MatRestoreRowUpperTriangular_MPISBAIJ,
1550 /*109*/0,
1551        0,
1552        0,
1553        0,
1554        0,
1555 /*114*/0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*119*/0,
1561        0,
1562        0,
1563        0
1564 };
1565 
1566 
1567 EXTERN_C_BEGIN
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1570 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1571 {
1572   PetscFunctionBegin;
1573   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1574   *iscopy = PETSC_FALSE;
1575   PetscFunctionReturn(0);
1576 }
1577 EXTERN_C_END
1578 
1579 EXTERN_C_BEGIN
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1582 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1583 {
1584   Mat_MPISBAIJ   *b;
1585   PetscErrorCode ierr;
1586   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1587 
1588   PetscFunctionBegin;
1589   if (bs < 0){
1590     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1591       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1592     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1593     bs   = PetscAbs(bs);
1594   }
1595   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1596   bs = newbs;
1597 
1598   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1599   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1600   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1601   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1602 
1603   B->rmap->bs = B->cmap->bs = bs;
1604   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1605   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1606 
1607   if (d_nnz) {
1608     for (i=0; i<B->rmap->n/bs; i++) {
1609       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1610     }
1611   }
1612   if (o_nnz) {
1613     for (i=0; i<B->rmap->n/bs; i++) {
1614       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1615     }
1616   }
1617 
1618   b   = (Mat_MPISBAIJ*)B->data;
1619   mbs = B->rmap->n/bs;
1620   Mbs = B->rmap->N/bs;
1621   if (mbs*bs != B->rmap->n) {
1622     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1623   }
1624 
1625   B->rmap->bs  = bs;
1626   b->bs2 = bs*bs;
1627   b->mbs = mbs;
1628   b->nbs = mbs;
1629   b->Mbs = Mbs;
1630   b->Nbs = Mbs;
1631 
1632   for (i=0; i<=b->size; i++) {
1633     b->rangebs[i] = B->rmap->range[i]/bs;
1634   }
1635   b->rstartbs = B->rmap->rstart/bs;
1636   b->rendbs   = B->rmap->rend/bs;
1637 
1638   b->cstartbs = B->cmap->rstart/bs;
1639   b->cendbs   = B->cmap->rend/bs;
1640 
1641   if (!B->preallocated) {
1642     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1643     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1644     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1645     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1646     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1647     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1648     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1649     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1650     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1651   }
1652 
1653   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1654   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1655   B->preallocated = PETSC_TRUE;
1656   PetscFunctionReturn(0);
1657 }
1658 EXTERN_C_END
1659 
1660 EXTERN_C_BEGIN
1661 #if defined(PETSC_HAVE_MUMPS)
1662 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1663 #endif
1664 #if defined(PETSC_HAVE_SPOOLES)
1665 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1666 #endif
1667 #if defined(PETSC_HAVE_PASTIX)
1668 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1669 #endif
1670 EXTERN_C_END
1671 
1672 /*MC
1673    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1674    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1675 
1676    Options Database Keys:
1677 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1678 
1679   Level: beginner
1680 
1681 .seealso: MatCreateMPISBAIJ
1682 M*/
1683 
1684 EXTERN_C_BEGIN
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatCreate_MPISBAIJ"
1687 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1688 {
1689   Mat_MPISBAIJ   *b;
1690   PetscErrorCode ierr;
1691   PetscTruth     flg;
1692 
1693   PetscFunctionBegin;
1694 
1695   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1696   B->data = (void*)b;
1697   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1698 
1699   B->ops->destroy    = MatDestroy_MPISBAIJ;
1700   B->ops->view       = MatView_MPISBAIJ;
1701   B->mapping         = 0;
1702   B->assembled       = PETSC_FALSE;
1703 
1704   B->insertmode = NOT_SET_VALUES;
1705   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1706   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1707 
1708   /* build local table of row and column ownerships */
1709   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1710 
1711   /* build cache for off array entries formed */
1712   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1713   b->donotstash  = PETSC_FALSE;
1714   b->colmap      = PETSC_NULL;
1715   b->garray      = PETSC_NULL;
1716   b->roworiented = PETSC_TRUE;
1717 
1718   /* stuff used in block assembly */
1719   b->barray       = 0;
1720 
1721   /* stuff used for matrix vector multiply */
1722   b->lvec         = 0;
1723   b->Mvctx        = 0;
1724   b->slvec0       = 0;
1725   b->slvec0b      = 0;
1726   b->slvec1       = 0;
1727   b->slvec1a      = 0;
1728   b->slvec1b      = 0;
1729   b->sMvctx       = 0;
1730 
1731   /* stuff for MatGetRow() */
1732   b->rowindices   = 0;
1733   b->rowvalues    = 0;
1734   b->getrowactive = PETSC_FALSE;
1735 
1736   /* hash table stuff */
1737   b->ht           = 0;
1738   b->hd           = 0;
1739   b->ht_size      = 0;
1740   b->ht_flag      = PETSC_FALSE;
1741   b->ht_fact      = 0;
1742   b->ht_total_ct  = 0;
1743   b->ht_insert_ct = 0;
1744 
1745   b->in_loc       = 0;
1746   b->v_loc        = 0;
1747   b->n_loc        = 0;
1748   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1749     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1750     if (flg) {
1751       PetscReal fact = 1.39;
1752       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1753       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1754       if (fact <= 1.0) fact = 1.39;
1755       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1756       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1757     }
1758   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1759 
1760 #if defined(PETSC_HAVE_PASTIX)
1761   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1762 					   "MatGetFactor_mpisbaij_pastix",
1763 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1764 #endif
1765 #if defined(PETSC_HAVE_MUMPS)
1766   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1767                                      "MatGetFactor_sbaij_mumps",
1768                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1769 #endif
1770 #if defined(PETSC_HAVE_SPOOLES)
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1772                                      "MatGetFactor_mpisbaij_spooles",
1773                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1774 #endif
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1776                                      "MatStoreValues_MPISBAIJ",
1777                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1779                                      "MatRetrieveValues_MPISBAIJ",
1780                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1782                                      "MatGetDiagonalBlock_MPISBAIJ",
1783                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1785                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1786                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1787   B->symmetric                  = PETSC_TRUE;
1788   B->structurally_symmetric     = PETSC_TRUE;
1789   B->symmetric_set              = PETSC_TRUE;
1790   B->structurally_symmetric_set = PETSC_TRUE;
1791   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 EXTERN_C_END
1795 
1796 /*MC
1797    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1798 
1799    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1800    and MATMPISBAIJ otherwise.
1801 
1802    Options Database Keys:
1803 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1804 
1805   Level: beginner
1806 
1807 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1808 M*/
1809 
1810 EXTERN_C_BEGIN
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatCreate_SBAIJ"
1813 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1814 {
1815   PetscErrorCode ierr;
1816   PetscMPIInt    size;
1817 
1818   PetscFunctionBegin;
1819   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1820   if (size == 1) {
1821     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1822   } else {
1823     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1824   }
1825   PetscFunctionReturn(0);
1826 }
1827 EXTERN_C_END
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1831 /*@C
1832    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1833    the user should preallocate the matrix storage by setting the parameters
1834    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1835    performance can be increased by more than a factor of 50.
1836 
1837    Collective on Mat
1838 
1839    Input Parameters:
1840 +  A - the matrix
1841 .  bs   - size of blockk
1842 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1843            submatrix  (same for all local rows)
1844 .  d_nnz - array containing the number of block nonzeros in the various block rows
1845            in the upper triangular and diagonal part of the in diagonal portion of the local
1846            (possibly different for each block row) or PETSC_NULL.  You must leave room
1847            for the diagonal entry even if it is zero.
1848 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1849            submatrix (same for all local rows).
1850 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1851            off-diagonal portion of the local submatrix (possibly different for
1852            each block row) or PETSC_NULL.
1853 
1854 
1855    Options Database Keys:
1856 .   -mat_no_unroll - uses code that does not unroll the loops in the
1857                      block calculations (much slower)
1858 .   -mat_block_size - size of the blocks to use
1859 
1860    Notes:
1861 
1862    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1863    than it must be used on all processors that share the object for that argument.
1864 
1865    If the *_nnz parameter is given then the *_nz parameter is ignored
1866 
1867    Storage Information:
1868    For a square global matrix we define each processor's diagonal portion
1869    to be its local rows and the corresponding columns (a square submatrix);
1870    each processor's off-diagonal portion encompasses the remainder of the
1871    local matrix (a rectangular submatrix).
1872 
1873    The user can specify preallocated storage for the diagonal part of
1874    the local submatrix with either d_nz or d_nnz (not both).  Set
1875    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1876    memory allocation.  Likewise, specify preallocated storage for the
1877    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1878 
1879    You can call MatGetInfo() to get information on how effective the preallocation was;
1880    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1881    You can also run with the option -info and look for messages with the string
1882    malloc in them to see if additional memory allocation was needed.
1883 
1884    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1885    the figure below we depict these three local rows and all columns (0-11).
1886 
1887 .vb
1888            0 1 2 3 4 5 6 7 8 9 10 11
1889           -------------------
1890    row 3  |  o o o d d d o o o o o o
1891    row 4  |  o o o d d d o o o o o o
1892    row 5  |  o o o d d d o o o o o o
1893           -------------------
1894 .ve
1895 
1896    Thus, any entries in the d locations are stored in the d (diagonal)
1897    submatrix, and any entries in the o locations are stored in the
1898    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1899    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1900 
1901    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1902    plus the diagonal part of the d matrix,
1903    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1904    In general, for PDE problems in which most nonzeros are near the diagonal,
1905    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1906    or you will get TERRIBLE performance; see the users' manual chapter on
1907    matrices.
1908 
1909    Level: intermediate
1910 
1911 .keywords: matrix, block, aij, compressed row, sparse, parallel
1912 
1913 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1914 @*/
1915 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1916 {
1917   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1918 
1919   PetscFunctionBegin;
1920   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1921   if (f) {
1922     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1923   }
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatCreateMPISBAIJ"
1929 /*@C
1930    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1931    (block compressed row).  For good matrix assembly performance
1932    the user should preallocate the matrix storage by setting the parameters
1933    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1934    performance can be increased by more than a factor of 50.
1935 
1936    Collective on MPI_Comm
1937 
1938    Input Parameters:
1939 +  comm - MPI communicator
1940 .  bs   - size of blockk
1941 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1942            This value should be the same as the local size used in creating the
1943            y vector for the matrix-vector product y = Ax.
1944 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1945            This value should be the same as the local size used in creating the
1946            x vector for the matrix-vector product y = Ax.
1947 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1948 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1949 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1950            submatrix  (same for all local rows)
1951 .  d_nnz - array containing the number of block nonzeros in the various block rows
1952            in the upper triangular portion of the in diagonal portion of the local
1953            (possibly different for each block block row) or PETSC_NULL.
1954            You must leave room for the diagonal entry even if it is zero.
1955 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1956            submatrix (same for all local rows).
1957 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1958            off-diagonal portion of the local submatrix (possibly different for
1959            each block row) or PETSC_NULL.
1960 
1961    Output Parameter:
1962 .  A - the matrix
1963 
1964    Options Database Keys:
1965 .   -mat_no_unroll - uses code that does not unroll the loops in the
1966                      block calculations (much slower)
1967 .   -mat_block_size - size of the blocks to use
1968 .   -mat_mpi - use the parallel matrix data structures even on one processor
1969                (defaults to using SeqBAIJ format on one processor)
1970 
1971    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1972    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1973    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1974 
1975    Notes:
1976    The number of rows and columns must be divisible by blocksize.
1977    This matrix type does not support complex Hermitian operation.
1978 
1979    The user MUST specify either the local or global matrix dimensions
1980    (possibly both).
1981 
1982    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1983    than it must be used on all processors that share the object for that argument.
1984 
1985    If the *_nnz parameter is given then the *_nz parameter is ignored
1986 
1987    Storage Information:
1988    For a square global matrix we define each processor's diagonal portion
1989    to be its local rows and the corresponding columns (a square submatrix);
1990    each processor's off-diagonal portion encompasses the remainder of the
1991    local matrix (a rectangular submatrix).
1992 
1993    The user can specify preallocated storage for the diagonal part of
1994    the local submatrix with either d_nz or d_nnz (not both).  Set
1995    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1996    memory allocation.  Likewise, specify preallocated storage for the
1997    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1998 
1999    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2000    the figure below we depict these three local rows and all columns (0-11).
2001 
2002 .vb
2003            0 1 2 3 4 5 6 7 8 9 10 11
2004           -------------------
2005    row 3  |  o o o d d d o o o o o o
2006    row 4  |  o o o d d d o o o o o o
2007    row 5  |  o o o d d d o o o o o o
2008           -------------------
2009 .ve
2010 
2011    Thus, any entries in the d locations are stored in the d (diagonal)
2012    submatrix, and any entries in the o locations are stored in the
2013    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2014    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2015 
2016    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2017    plus the diagonal part of the d matrix,
2018    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2019    In general, for PDE problems in which most nonzeros are near the diagonal,
2020    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2021    or you will get TERRIBLE performance; see the users' manual chapter on
2022    matrices.
2023 
2024    Level: intermediate
2025 
2026 .keywords: matrix, block, aij, compressed row, sparse, parallel
2027 
2028 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2029 @*/
2030 
2031 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2032 {
2033   PetscErrorCode ierr;
2034   PetscMPIInt    size;
2035 
2036   PetscFunctionBegin;
2037   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2038   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2039   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2040   if (size > 1) {
2041     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2042     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2043   } else {
2044     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2045     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2046   }
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2053 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2054 {
2055   Mat            mat;
2056   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2057   PetscErrorCode ierr;
2058   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2059   PetscScalar    *array;
2060 
2061   PetscFunctionBegin;
2062   *newmat       = 0;
2063   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2064   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2065   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2066   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2067   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2068   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2069 
2070   mat->factortype   = matin->factortype;
2071   mat->preallocated = PETSC_TRUE;
2072   mat->assembled    = PETSC_TRUE;
2073   mat->insertmode   = NOT_SET_VALUES;
2074 
2075   a = (Mat_MPISBAIJ*)mat->data;
2076   a->bs2   = oldmat->bs2;
2077   a->mbs   = oldmat->mbs;
2078   a->nbs   = oldmat->nbs;
2079   a->Mbs   = oldmat->Mbs;
2080   a->Nbs   = oldmat->Nbs;
2081 
2082 
2083   a->size         = oldmat->size;
2084   a->rank         = oldmat->rank;
2085   a->donotstash   = oldmat->donotstash;
2086   a->roworiented  = oldmat->roworiented;
2087   a->rowindices   = 0;
2088   a->rowvalues    = 0;
2089   a->getrowactive = PETSC_FALSE;
2090   a->barray       = 0;
2091   a->rstartbs    = oldmat->rstartbs;
2092   a->rendbs      = oldmat->rendbs;
2093   a->cstartbs    = oldmat->cstartbs;
2094   a->cendbs      = oldmat->cendbs;
2095 
2096   /* hash table stuff */
2097   a->ht           = 0;
2098   a->hd           = 0;
2099   a->ht_size      = 0;
2100   a->ht_flag      = oldmat->ht_flag;
2101   a->ht_fact      = oldmat->ht_fact;
2102   a->ht_total_ct  = 0;
2103   a->ht_insert_ct = 0;
2104 
2105   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2106   if (oldmat->colmap) {
2107 #if defined (PETSC_USE_CTABLE)
2108     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2109 #else
2110     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2111     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2112     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2113 #endif
2114   } else a->colmap = 0;
2115 
2116   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2117     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2118     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2119     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2120   } else a->garray = 0;
2121 
2122   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2123   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2124   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2125   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2126   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2127 
2128   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2129   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2130   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2132 
2133   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2134   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2135   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2136   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2137   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2138   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2139   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2140   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2141   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2142   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2146 
2147   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2148   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2149   a->sMvctx = oldmat->sMvctx;
2150   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2151 
2152   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2153   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2154   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2156   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2157   *newmat = mat;
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatLoad_MPISBAIJ"
2163 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2164 {
2165   PetscErrorCode ierr;
2166   PetscInt       i,nz,j,rstart,rend;
2167   PetscScalar    *vals,*buf;
2168   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2169   MPI_Status     status;
2170   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2171   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2172   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2173   PetscInt       bs=1,Mbs,mbs,extra_rows;
2174   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2175   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2176   int            fd;
2177 
2178   PetscFunctionBegin;
2179   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2180     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2181   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2182 
2183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2184   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2185   if (!rank) {
2186     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2187     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2188     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2189     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2190   }
2191 
2192   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2193 
2194   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2195   M = header[1]; N = header[2];
2196 
2197   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2198   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2199   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2200 
2201   /* If global sizes are set, check if they are consistent with that given in the file */
2202   if (sizesset) {
2203     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2204   }
2205   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2206   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2207 
2208   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2209 
2210   /*
2211      This code adds extra rows to make sure the number of rows is
2212      divisible by the blocksize
2213   */
2214   Mbs        = M/bs;
2215   extra_rows = bs - M + bs*(Mbs);
2216   if (extra_rows == bs) extra_rows = 0;
2217   else                  Mbs++;
2218   if (extra_rows &&!rank) {
2219     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2220   }
2221 
2222   /* determine ownership of all rows */
2223   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2224     mbs        = Mbs/size + ((Mbs % size) > rank);
2225     m          = mbs*bs;
2226   } else { /* User Set */
2227     m          = newmat->rmap->n;
2228     mbs        = m/bs;
2229   }
2230   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2231   mmbs       = PetscMPIIntCast(mbs);
2232   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2233   rowners[0] = 0;
2234   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2235   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2236   rstart = rowners[rank];
2237   rend   = rowners[rank+1];
2238 
2239   /* distribute row lengths to all processors */
2240   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2241   if (!rank) {
2242     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2243     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2244     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2245     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2246     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2247     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2248     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2249   } else {
2250     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2251   }
2252 
2253   if (!rank) {   /* procs[0] */
2254     /* calculate the number of nonzeros on each processor */
2255     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2256     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2257     for (i=0; i<size; i++) {
2258       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2259         procsnz[i] += rowlengths[j];
2260       }
2261     }
2262     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2263 
2264     /* determine max buffer needed and allocate it */
2265     maxnz = 0;
2266     for (i=0; i<size; i++) {
2267       maxnz = PetscMax(maxnz,procsnz[i]);
2268     }
2269     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2270 
2271     /* read in my part of the matrix column indices  */
2272     nz     = procsnz[0];
2273     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2274     mycols = ibuf;
2275     if (size == 1)  nz -= extra_rows;
2276     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2277     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2278 
2279     /* read in every ones (except the last) and ship off */
2280     for (i=1; i<size-1; i++) {
2281       nz   = procsnz[i];
2282       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2283       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2284     }
2285     /* read in the stuff for the last proc */
2286     if (size != 1) {
2287       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2288       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2289       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2290       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2291     }
2292     ierr = PetscFree(cols);CHKERRQ(ierr);
2293   } else {  /* procs[i], i>0 */
2294     /* determine buffer space needed for message */
2295     nz = 0;
2296     for (i=0; i<m; i++) {
2297       nz += locrowlens[i];
2298     }
2299     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2300     mycols = ibuf;
2301     /* receive message of column indices*/
2302     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2303     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2304     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2305   }
2306 
2307   /* loop over local rows, determining number of off diagonal entries */
2308   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2309   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2310   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2311   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2312   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2313   rowcount = 0;
2314   nzcount  = 0;
2315   for (i=0; i<mbs; i++) {
2316     dcount  = 0;
2317     odcount = 0;
2318     for (j=0; j<bs; j++) {
2319       kmax = locrowlens[rowcount];
2320       for (k=0; k<kmax; k++) {
2321         tmp = mycols[nzcount++]/bs; /* block col. index */
2322         if (!mask[tmp]) {
2323           mask[tmp] = 1;
2324           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2325           else masked1[dcount++] = tmp; /* entry in diag portion */
2326         }
2327       }
2328       rowcount++;
2329     }
2330 
2331     dlens[i]  = dcount;  /* d_nzz[i] */
2332     odlens[i] = odcount; /* o_nzz[i] */
2333 
2334     /* zero out the mask elements we set */
2335     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2336     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2337   }
2338     if (!sizesset) {
2339     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2340   }
2341   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2342   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2343 
2344   if (!rank) {
2345     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2346     /* read in my part of the matrix numerical values  */
2347     nz = procsnz[0];
2348     vals = buf;
2349     mycols = ibuf;
2350     if (size == 1)  nz -= extra_rows;
2351     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2352     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2353 
2354     /* insert into matrix */
2355     jj      = rstart*bs;
2356     for (i=0; i<m; i++) {
2357       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2358       mycols += locrowlens[i];
2359       vals   += locrowlens[i];
2360       jj++;
2361     }
2362 
2363     /* read in other processors (except the last one) and ship out */
2364     for (i=1; i<size-1; i++) {
2365       nz   = procsnz[i];
2366       vals = buf;
2367       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2368       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2369     }
2370     /* the last proc */
2371     if (size != 1){
2372       nz   = procsnz[i] - extra_rows;
2373       vals = buf;
2374       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2375       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2376       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2377     }
2378     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2379 
2380   } else {
2381     /* receive numeric values */
2382     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2383 
2384     /* receive message of values*/
2385     vals   = buf;
2386     mycols = ibuf;
2387     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2388     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2389     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2390 
2391     /* insert into matrix */
2392     jj      = rstart*bs;
2393     for (i=0; i<m; i++) {
2394       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2395       mycols += locrowlens[i];
2396       vals   += locrowlens[i];
2397       jj++;
2398     }
2399   }
2400 
2401   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2402   ierr = PetscFree(buf);CHKERRQ(ierr);
2403   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2404   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2405   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2406   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2407   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2408   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2409   PetscFunctionReturn(0);
2410 }
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2414 /*XXXXX@
2415    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2416 
2417    Input Parameters:
2418 .  mat  - the matrix
2419 .  fact - factor
2420 
2421    Not Collective on Mat, each process can have a different hash factor
2422 
2423    Level: advanced
2424 
2425   Notes:
2426    This can also be set by the command line option: -mat_use_hash_table fact
2427 
2428 .keywords: matrix, hashtable, factor, HT
2429 
2430 .seealso: MatSetOption()
2431 @XXXXX*/
2432 
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2436 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2437 {
2438   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2439   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2440   PetscReal      atmp;
2441   PetscReal      *work,*svalues,*rvalues;
2442   PetscErrorCode ierr;
2443   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2444   PetscMPIInt    rank,size;
2445   PetscInt       *rowners_bs,dest,count,source;
2446   PetscScalar    *va;
2447   MatScalar      *ba;
2448   MPI_Status     stat;
2449 
2450   PetscFunctionBegin;
2451   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2452   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2453   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2454 
2455   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2456   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2457 
2458   bs   = A->rmap->bs;
2459   mbs  = a->mbs;
2460   Mbs  = a->Mbs;
2461   ba   = b->a;
2462   bi   = b->i;
2463   bj   = b->j;
2464 
2465   /* find ownerships */
2466   rowners_bs = A->rmap->range;
2467 
2468   /* each proc creates an array to be distributed */
2469   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2470   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2471 
2472   /* row_max for B */
2473   if (rank != size-1){
2474     for (i=0; i<mbs; i++) {
2475       ncols = bi[1] - bi[0]; bi++;
2476       brow  = bs*i;
2477       for (j=0; j<ncols; j++){
2478         bcol = bs*(*bj);
2479         for (kcol=0; kcol<bs; kcol++){
2480           col = bcol + kcol;                 /* local col index */
2481           col += rowners_bs[rank+1];      /* global col index */
2482           for (krow=0; krow<bs; krow++){
2483             atmp = PetscAbsScalar(*ba); ba++;
2484             row = brow + krow;    /* local row index */
2485             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2486             if (work[col] < atmp) work[col] = atmp;
2487           }
2488         }
2489         bj++;
2490       }
2491     }
2492 
2493     /* send values to its owners */
2494     for (dest=rank+1; dest<size; dest++){
2495       svalues = work + rowners_bs[dest];
2496       count   = rowners_bs[dest+1]-rowners_bs[dest];
2497       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2498     }
2499   }
2500 
2501   /* receive values */
2502   if (rank){
2503     rvalues = work;
2504     count   = rowners_bs[rank+1]-rowners_bs[rank];
2505     for (source=0; source<rank; source++){
2506       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2507       /* process values */
2508       for (i=0; i<count; i++){
2509         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2510       }
2511     }
2512   }
2513 
2514   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2515   ierr = PetscFree(work);CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatSOR_MPISBAIJ"
2521 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2522 {
2523   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2524   PetscErrorCode    ierr;
2525   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2526   PetscScalar       *x,*ptr,*from;
2527   Vec               bb1;
2528   const PetscScalar *b;
2529 
2530   PetscFunctionBegin;
2531   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2532   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2533 
2534   if (flag == SOR_APPLY_UPPER) {
2535     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2536     PetscFunctionReturn(0);
2537   }
2538 
2539   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2540     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2541       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2542       its--;
2543     }
2544 
2545     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2546     while (its--){
2547 
2548       /* lower triangular part: slvec0b = - B^T*xx */
2549       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2550 
2551       /* copy xx into slvec0a */
2552       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2553       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2554       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2555       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2556 
2557       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2558 
2559       /* copy bb into slvec1a */
2560       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2561       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2562       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2563       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2564 
2565       /* set slvec1b = 0 */
2566       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2567 
2568       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2569       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2570       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2571       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2572 
2573       /* upper triangular part: bb1 = bb1 - B*x */
2574       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2575 
2576       /* local diagonal sweep */
2577       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2578     }
2579     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2580   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2581     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2582   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2583     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2584   } else if (flag & SOR_EISENSTAT) {
2585     Vec               xx1;
2586     PetscTruth        hasop;
2587     const PetscScalar *diag;
2588     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2589     PetscInt          i,n;
2590 
2591     if (!mat->xx1) {
2592       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2593       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2594     }
2595     xx1 = mat->xx1;
2596     bb1 = mat->bb1;
2597 
2598     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2599 
2600     if (!mat->diag) {
2601       /* this is wrong for same matrix with new nonzero values */
2602       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2603       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2604     }
2605     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2606 
2607     if (hasop) {
2608       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2609       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2610     } else {
2611       /*
2612           These two lines are replaced by code that may be a bit faster for a good compiler
2613       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2614       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2615       */
2616       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2617       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2618       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2619       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2620       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2621       if (omega == 1.0) {
2622 	for (i=0; i<n; i++) {
2623 	  sl[i] = b[i] - diag[i]*x[i];
2624 	}
2625         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2626       } else {
2627 	for (i=0; i<n; i++) {
2628 	  sl[i] = b[i] + scale*diag[i]*x[i];
2629 	}
2630         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2631       }
2632       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2633       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2634       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2635       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2636     }
2637 
2638     /* multiply off-diagonal portion of matrix */
2639     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2640     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2641     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2642     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2643     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2644     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2645     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2646     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2647     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2648     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2649 
2650     /* local sweep */
2651     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2652     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2653   } else {
2654     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2655   }
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2661 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2662 {
2663   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2664   PetscErrorCode ierr;
2665   Vec            lvec1,bb1;
2666 
2667   PetscFunctionBegin;
2668   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2669   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2670 
2671   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2672     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2673       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2674       its--;
2675     }
2676 
2677     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2678     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2679     while (its--){
2680       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2681 
2682       /* lower diagonal part: bb1 = bb - B^T*xx */
2683       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2684       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2685 
2686       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2688       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2689 
2690       /* upper diagonal part: bb1 = bb1 - B*x */
2691       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2692       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2693 
2694       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2695 
2696       /* diagonal sweep */
2697       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2698     }
2699     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2700     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2701   } else {
2702     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2703   }
2704   PetscFunctionReturn(0);
2705 }
2706 
2707