xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 6ba06ab77c48d2bbbcba7e829ffcd04868efd3d5)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define CHUNKSIZE  10
56 
57 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
58 { \
59  \
60     brow = row/bs;  \
61     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
62     rmax = aimax[brow]; nrow = ailen[brow]; \
63       bcol = col/bs; \
64       ridx = row % bs; cidx = col % bs; \
65       low = 0; high = nrow; \
66       while (high-low > 3) { \
67         t = (low+high)/2; \
68         if (rp[t] > bcol) high = t; \
69         else              low  = t; \
70       } \
71       for (_i=low; _i<high; _i++) { \
72         if (rp[_i] > bcol) break; \
73         if (rp[_i] == bcol) { \
74           bap  = ap +  bs2*_i + bs*cidx + ridx; \
75           if (addv == ADD_VALUES) *bap += value;  \
76           else                    *bap  = value;  \
77           goto a_noinsert; \
78         } \
79       } \
80       if (a->nonew == 1) goto a_noinsert; \
81       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
82       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
83       N = nrow++ - 1;  \
84       /* shift up all the later entries in this row */ \
85       for (ii=N; ii>=_i; ii--) { \
86         rp[ii+1] = rp[ii]; \
87         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
88       } \
89       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
90       rp[_i]                      = bcol;  \
91       ap[bs2*_i + bs*cidx + ridx] = value;  \
92       a_noinsert:; \
93     ailen[brow] = nrow; \
94 }
95 
96 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
97 { \
98     brow = row/bs;  \
99     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
100     rmax = bimax[brow]; nrow = bilen[brow]; \
101       bcol = col/bs; \
102       ridx = row % bs; cidx = col % bs; \
103       low = 0; high = nrow; \
104       while (high-low > 3) { \
105         t = (low+high)/2; \
106         if (rp[t] > bcol) high = t; \
107         else              low  = t; \
108       } \
109       for (_i=low; _i<high; _i++) { \
110         if (rp[_i] > bcol) break; \
111         if (rp[_i] == bcol) { \
112           bap  = ap +  bs2*_i + bs*cidx + ridx; \
113           if (addv == ADD_VALUES) *bap += value;  \
114           else                    *bap  = value;  \
115           goto b_noinsert; \
116         } \
117       } \
118       if (b->nonew == 1) goto b_noinsert; \
119       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
120       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
121       N = nrow++ - 1;  \
122       /* shift up all the later entries in this row */ \
123       for (ii=N; ii>=_i; ii--) { \
124         rp[ii+1] = rp[ii]; \
125         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
126       } \
127       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
128       rp[_i]                      = bcol;  \
129       ap[bs2*_i + bs*cidx + ridx] = value;  \
130       b_noinsert:; \
131     bilen[brow] = nrow; \
132 }
133 
134 /* Only add/insert a(i,j) with i<=j (blocks).
135    Any a(i,j) with i>j input by user is ingored.
136 */
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSetValues_MPISBAIJ"
139 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
140 {
141   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
142   MatScalar      value;
143   PetscTruth     roworiented = baij->roworiented;
144   PetscErrorCode ierr;
145   PetscInt       i,j,row,col;
146   PetscInt       rstart_orig=mat->rmap->rstart;
147   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
148   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
149 
150   /* Some Variables required in the macro */
151   Mat            A = baij->A;
152   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
153   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
154   MatScalar      *aa=a->a;
155 
156   Mat            B = baij->B;
157   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
158   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
159   MatScalar     *ba=b->a;
160 
161   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
162   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
163   MatScalar     *ap,*bap;
164 
165   /* for stash */
166   PetscInt      n_loc, *in_loc = PETSC_NULL;
167   MatScalar     *v_loc = PETSC_NULL;
168 
169   PetscFunctionBegin;
170   if (v) PetscValidScalarPointer(v,6);
171   if (!baij->donotstash){
172     if (n > baij->n_loc) {
173       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
176       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
177       baij->n_loc = n;
178     }
179     in_loc = baij->in_loc;
180     v_loc  = baij->v_loc;
181   }
182 
183   for (i=0; i<m; i++) {
184     if (im[i] < 0) continue;
185 #if defined(PETSC_USE_DEBUG)
186     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
187 #endif
188     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
189       row = im[i] - rstart_orig;              /* local row index */
190       for (j=0; j<n; j++) {
191         if (im[i]/bs > in[j]/bs){
192           if (a->ignore_ltriangular){
193             continue;    /* ignore lower triangular blocks */
194           } else {
195             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
196           }
197         }
198         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
199           col = in[j] - cstart_orig;          /* local col index */
200           brow = row/bs; bcol = col/bs;
201           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
202           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
203           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
204           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
205         } else if (in[j] < 0) continue;
206 #if defined(PETSC_USE_DEBUG)
207         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
208 #endif
209         else {  /* off-diag entry (B) */
210           if (mat->was_assembled) {
211             if (!baij->colmap) {
212               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
213             }
214 #if defined (PETSC_USE_CTABLE)
215             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
216             col  = col - 1;
217 #else
218             col = baij->colmap[in[j]/bs] - 1;
219 #endif
220             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
221               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
222               col =  in[j];
223               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
224               B = baij->B;
225               b = (Mat_SeqBAIJ*)(B)->data;
226               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
227               ba=b->a;
228             } else col += in[j]%bs;
229           } else col = in[j];
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         }
234       }
235     } else {  /* off processor entry */
236       if (!baij->donotstash) {
237         n_loc = 0;
238         for (j=0; j<n; j++){
239           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
240           in_loc[n_loc] = in[j];
241           if (roworiented) {
242             v_loc[n_loc] = v[i*n+j];
243           } else {
244             v_loc[n_loc] = v[j*m+i];
245           }
246           n_loc++;
247         }
248         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
258 {
259   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
260   const MatScalar *value;
261   MatScalar       *barray=baij->barray;
262   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
263   PetscErrorCode  ierr;
264   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
265   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
266   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
267 
268   PetscFunctionBegin;
269   if(!barray) {
270     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
271     baij->barray = barray;
272   }
273 
274   if (roworiented) {
275     stepval = (n-1)*bs;
276   } else {
277     stepval = (m-1)*bs;
278   }
279   for (i=0; i<m; i++) {
280     if (im[i] < 0) continue;
281 #if defined(PETSC_USE_DEBUG)
282     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
283 #endif
284     if (im[i] >= rstart && im[i] < rend) {
285       row = im[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (im[i] > in[j]) {
288           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
289           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
290         }
291         /* If NumCol = 1 then a copy is not required */
292         if ((roworiented) && (n == 1)) {
293           barray = (MatScalar*) v + i*bs2;
294         } else if((!roworiented) && (m == 1)) {
295           barray = (MatScalar*) v + j*bs2;
296         } else { /* Here a copy is required */
297           if (roworiented) {
298             value = v + i*(stepval+bs)*bs + j*bs;
299           } else {
300             value = v + j*(stepval+bs)*bs + i*bs;
301           }
302           for (ii=0; ii<bs; ii++,value+=stepval) {
303             for (jj=0; jj<bs; jj++) {
304               *barray++  = *value++;
305             }
306           }
307           barray -=bs2;
308         }
309 
310         if (in[j] >= cstart && in[j] < cend){
311           col  = in[j] - cstart;
312           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
313         }
314         else if (in[j] < 0) continue;
315 #if defined(PETSC_USE_DEBUG)
316         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
317 #endif
318         else {
319           if (mat->was_assembled) {
320             if (!baij->colmap) {
321               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
322             }
323 
324 #if defined(PETSC_USE_DEBUG)
325 #if defined (PETSC_USE_CTABLE)
326             { PetscInt data;
327               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
328               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
329             }
330 #else
331             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
332 #endif
333 #endif
334 #if defined (PETSC_USE_CTABLE)
335 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
336             col  = (col - 1)/bs;
337 #else
338             col = (baij->colmap[in[j]] - 1)/bs;
339 #endif
340             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
341               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
342               col =  in[j];
343             }
344           }
345           else col = in[j];
346           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
347         }
348       }
349     } else {
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = sqrt(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscTruth     other_disassembled;
522   PetscMPIInt    n;
523   PetscTruth     r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscTruth        iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
626               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
627               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
632       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
634       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_INFO) {
637       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
640       PetscFunctionReturn(0);
641     }
642   }
643 
644   if (isdraw) {
645     PetscDraw  draw;
646     PetscTruth isnull;
647     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
648     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
654   } else {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
671     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
672 
673     /* copy over the A part */
674     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
675     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
676     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
677 
678     for (i=0; i<mbs; i++) {
679       rvals[0] = bs*(baij->rstartbs + i);
680       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
681       for (j=ai[i]; j<ai[i+1]; j++) {
682         col = (baij->cstartbs+aj[j])*bs;
683         for (k=0; k<bs; k++) {
684           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
685           col++; a += bs;
686         }
687       }
688     }
689     /* copy over the B part */
690     Bloc = (Mat_SeqBAIJ*)baij->B->data;
691     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
692     for (i=0; i<mbs; i++) {
693 
694       rvals[0] = bs*(baij->rstartbs + i);
695       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
696       for (j=ai[i]; j<ai[i+1]; j++) {
697         col = baij->garray[aj[j]]*bs;
698         for (k=0; k<bs; k++) {
699           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
700           col++; a += bs;
701         }
702       }
703     }
704     ierr = PetscFree(rvals);CHKERRQ(ierr);
705     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     /*
708        Everyone has to call to draw the matrix since the graphics waits are
709        synchronized across all processors that share the PetscDraw object
710     */
711     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
712     if (!rank) {
713       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
714       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
715     }
716     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
717     ierr = MatDestroy(A);CHKERRQ(ierr);
718   }
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatView_MPISBAIJ"
724 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
725 {
726   PetscErrorCode ierr;
727   PetscTruth     iascii,isdraw,issocket,isbinary;
728 
729   PetscFunctionBegin;
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
734   if (iascii || isdraw || issocket || isbinary) {
735     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
736   } else {
737     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDestroy_MPISBAIJ"
744 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750 #if defined(PETSC_USE_LOG)
751   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
752 #endif
753   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
754   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
755   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
756   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
757 #if defined (PETSC_USE_CTABLE)
758   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
759 #else
760   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
761 #endif
762   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
763   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
764   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
765   if (baij->slvec0) {
766     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
767     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
768   }
769   if (baij->slvec1) {
770     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
771     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
772     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
773   }
774   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
775   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
776   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
777   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
778   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
779   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
780   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
781 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
782   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
783 #endif
784   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
787   ierr = PetscFree(baij);CHKERRQ(ierr);
788 
789   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(((PetscObject)A)->comm);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952 
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) { max = tmp; }
1011     }
1012     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap  = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart)
1049               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050             else break;
1051           }
1052           imark = i;
1053         }
1054         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1055         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056       }
1057     } else {
1058       if (idx) *idx = 0;
1059       if (v)   *v   = 0;
1060     }
1061   }
1062   *nz = nztot;
1063   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1076   baij->getrowactive = PETSC_FALSE;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1082 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1086 
1087   PetscFunctionBegin;
1088   aA->getrow_utriangular = PETSC_TRUE;
1089   PetscFunctionReturn(0);
1090 }
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1105 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1106 {
1107   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1118 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->rmap->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1210     break;
1211   case MAT_NEW_DIAGONALS:
1212     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = flg;
1216     break;
1217   case MAT_USE_HASH_TABLE:
1218     a->ht_flag = flg;
1219     break;
1220   case MAT_HERMITIAN:
1221     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SYMMETRIC:
1226     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1227     break;
1228   case MAT_STRUCTURALLY_SYMMETRIC:
1229     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1230     break;
1231   case MAT_SYMMETRY_ETERNAL:
1232     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1233     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1234     break;
1235   case MAT_IGNORE_LOWER_TRIANGULAR:
1236     aA->ignore_ltriangular = flg;
1237     break;
1238   case MAT_ERROR_LOWER_TRIANGULAR:
1239     aA->ignore_ltriangular = flg;
1240     break;
1241   case MAT_GETROW_UPPERTRIANGULAR:
1242     aA->getrow_utriangular = flg;
1243     break;
1244   default:
1245     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1252 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1253 {
1254   PetscErrorCode ierr;
1255   PetscFunctionBegin;
1256   if (MAT_INITIAL_MATRIX || *B != A) {
1257     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1264 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1265 {
1266   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1267   Mat            a=baij->A, b=baij->B;
1268   PetscErrorCode ierr;
1269   PetscInt       nv,m,n;
1270   PetscTruth     flg;
1271 
1272   PetscFunctionBegin;
1273   if (ll != rr){
1274     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1275     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1276   }
1277   if (!ll) PetscFunctionReturn(0);
1278 
1279   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1280   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1281 
1282   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1283   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1284 
1285   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1286 
1287   /* left diagonalscale the off-diagonal part */
1288   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1289 
1290   /* scale the diagonal part */
1291   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1292 
1293   /* right diagonalscale the off-diagonal part */
1294   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1301 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1302 {
1303   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1304   PetscErrorCode ierr;
1305 
1306   PetscFunctionBegin;
1307   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatEqual_MPISBAIJ"
1315 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1316 {
1317   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1318   Mat            a,b,c,d;
1319   PetscTruth     flg;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   a = matA->A; b = matA->B;
1324   c = matB->A; d = matB->B;
1325 
1326   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1327   if (flg) {
1328     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1329   }
1330   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatCopy_MPISBAIJ"
1336 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1337 {
1338   PetscErrorCode ierr;
1339   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1340   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1341 
1342   PetscFunctionBegin;
1343   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1344   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1345     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1346     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1347     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1348   } else {
1349     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1350     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1357 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1358 {
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1368 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1369 {
1370   PetscErrorCode ierr;
1371   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1372   PetscBLASInt   bnz,one=1;
1373   Mat_SeqSBAIJ   *xa,*ya;
1374   Mat_SeqBAIJ    *xb,*yb;
1375 
1376   PetscFunctionBegin;
1377   if (str == SAME_NONZERO_PATTERN) {
1378     PetscScalar alpha = a;
1379     xa = (Mat_SeqSBAIJ *)xx->A->data;
1380     ya = (Mat_SeqSBAIJ *)yy->A->data;
1381     bnz = PetscBLASIntCast(xa->nz);
1382     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1383     xb = (Mat_SeqBAIJ *)xx->B->data;
1384     yb = (Mat_SeqBAIJ *)yy->B->data;
1385     bnz = PetscBLASIntCast(xb->nz);
1386     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1387   } else {
1388     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1389     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1390     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1397 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1398 {
1399   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1400   PetscInt        rbs,cbs;
1401   PetscErrorCode  ierr;
1402 
1403   PetscFunctionBegin;
1404   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1405   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1406   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1407   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1408   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1409   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1415 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1416 {
1417   PetscErrorCode ierr;
1418   PetscInt       i;
1419   PetscTruth     flg;
1420 
1421   PetscFunctionBegin;
1422   for (i=0; i<n; i++) {
1423     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1424     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1425   }
1426   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 
1431 /* -------------------------------------------------------------------*/
1432 static struct _MatOps MatOps_Values = {
1433        MatSetValues_MPISBAIJ,
1434        MatGetRow_MPISBAIJ,
1435        MatRestoreRow_MPISBAIJ,
1436        MatMult_MPISBAIJ,
1437 /* 4*/ MatMultAdd_MPISBAIJ,
1438        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1439        MatMultAdd_MPISBAIJ,
1440        0,
1441        0,
1442        0,
1443 /*10*/ 0,
1444        0,
1445        0,
1446        MatSOR_MPISBAIJ,
1447        MatTranspose_MPISBAIJ,
1448 /*15*/ MatGetInfo_MPISBAIJ,
1449        MatEqual_MPISBAIJ,
1450        MatGetDiagonal_MPISBAIJ,
1451        MatDiagonalScale_MPISBAIJ,
1452        MatNorm_MPISBAIJ,
1453 /*20*/ MatAssemblyBegin_MPISBAIJ,
1454        MatAssemblyEnd_MPISBAIJ,
1455        MatSetOption_MPISBAIJ,
1456        MatZeroEntries_MPISBAIJ,
1457 /*24*/ 0,
1458        0,
1459        0,
1460        0,
1461        0,
1462 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1463        0,
1464        0,
1465        0,
1466        0,
1467 /*34*/ MatDuplicate_MPISBAIJ,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*39*/ MatAXPY_MPISBAIJ,
1473        MatGetSubMatrices_MPISBAIJ,
1474        MatIncreaseOverlap_MPISBAIJ,
1475        MatGetValues_MPISBAIJ,
1476        MatCopy_MPISBAIJ,
1477 /*44*/ 0,
1478        MatScale_MPISBAIJ,
1479        0,
1480        0,
1481        0,
1482 /*49*/ MatSetBlockSize_MPISBAIJ,
1483        0,
1484        0,
1485        0,
1486        0,
1487 /*54*/ 0,
1488        0,
1489        MatSetUnfactored_MPISBAIJ,
1490        0,
1491        MatSetValuesBlocked_MPISBAIJ,
1492 /*59*/ 0,
1493        0,
1494        0,
1495        0,
1496        0,
1497 /*64*/ 0,
1498        0,
1499        0,
1500        0,
1501        0,
1502 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1503        0,
1504        0,
1505        0,
1506        0,
1507 /*74*/ 0,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*79*/ 0,
1513        0,
1514        0,
1515        0,
1516        MatLoad_MPISBAIJ,
1517 /*84*/ 0,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*89*/ 0,
1523        0,
1524        0,
1525        0,
1526        0,
1527 /*94*/ 0,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*99*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*104*/0,
1538        MatRealPart_MPISBAIJ,
1539        MatImaginaryPart_MPISBAIJ,
1540        MatGetRowUpperTriangular_MPISBAIJ,
1541        MatRestoreRowUpperTriangular_MPISBAIJ
1542 };
1543 
1544 
1545 EXTERN_C_BEGIN
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1548 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1549 {
1550   PetscFunctionBegin;
1551   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1552   *iscopy = PETSC_FALSE;
1553   PetscFunctionReturn(0);
1554 }
1555 EXTERN_C_END
1556 
1557 EXTERN_C_BEGIN
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1560 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1561 {
1562   Mat_MPISBAIJ   *b;
1563   PetscErrorCode ierr;
1564   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1565 
1566   PetscFunctionBegin;
1567   if (bs < 0){
1568     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1569       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1570     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1571     bs   = PetscAbs(bs);
1572   }
1573   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1574   bs = newbs;
1575 
1576   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1577   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1578   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1579   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1580 
1581   B->rmap->bs = B->cmap->bs = bs;
1582   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1583   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1584 
1585   if (d_nnz) {
1586     for (i=0; i<B->rmap->n/bs; i++) {
1587       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1588     }
1589   }
1590   if (o_nnz) {
1591     for (i=0; i<B->rmap->n/bs; i++) {
1592       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1593     }
1594   }
1595 
1596   b   = (Mat_MPISBAIJ*)B->data;
1597   mbs = B->rmap->n/bs;
1598   Mbs = B->rmap->N/bs;
1599   if (mbs*bs != B->rmap->n) {
1600     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1601   }
1602 
1603   B->rmap->bs  = bs;
1604   b->bs2 = bs*bs;
1605   b->mbs = mbs;
1606   b->nbs = mbs;
1607   b->Mbs = Mbs;
1608   b->Nbs = Mbs;
1609 
1610   for (i=0; i<=b->size; i++) {
1611     b->rangebs[i] = B->rmap->range[i]/bs;
1612   }
1613   b->rstartbs = B->rmap->rstart/bs;
1614   b->rendbs   = B->rmap->rend/bs;
1615 
1616   b->cstartbs = B->cmap->rstart/bs;
1617   b->cendbs   = B->cmap->rend/bs;
1618 
1619   if (!B->preallocated) {
1620     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1621     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1622     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1623     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1624     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1625     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1626     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1627     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1628     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1629   }
1630 
1631   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1632   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1633   B->preallocated = PETSC_TRUE;
1634   PetscFunctionReturn(0);
1635 }
1636 EXTERN_C_END
1637 
1638 EXTERN_C_BEGIN
1639 #if defined(PETSC_HAVE_MUMPS)
1640 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1641 #endif
1642 #if defined(PETSC_HAVE_SPOOLES)
1643 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1644 #endif
1645 #if defined(PETSC_HAVE_PASTIX)
1646 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1647 #endif
1648 EXTERN_C_END
1649 
1650 /*MC
1651    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1652    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1653 
1654    Options Database Keys:
1655 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1656 
1657   Level: beginner
1658 
1659 .seealso: MatCreateMPISBAIJ
1660 M*/
1661 
1662 EXTERN_C_BEGIN
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatCreate_MPISBAIJ"
1665 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1666 {
1667   Mat_MPISBAIJ   *b;
1668   PetscErrorCode ierr;
1669   PetscTruth     flg;
1670 
1671   PetscFunctionBegin;
1672 
1673   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1674   B->data = (void*)b;
1675   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1676 
1677   B->ops->destroy    = MatDestroy_MPISBAIJ;
1678   B->ops->view       = MatView_MPISBAIJ;
1679   B->mapping         = 0;
1680   B->assembled       = PETSC_FALSE;
1681 
1682   B->insertmode = NOT_SET_VALUES;
1683   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1684   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1685 
1686   /* build local table of row and column ownerships */
1687   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1688 
1689   /* build cache for off array entries formed */
1690   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1691   b->donotstash  = PETSC_FALSE;
1692   b->colmap      = PETSC_NULL;
1693   b->garray      = PETSC_NULL;
1694   b->roworiented = PETSC_TRUE;
1695 
1696   /* stuff used in block assembly */
1697   b->barray       = 0;
1698 
1699   /* stuff used for matrix vector multiply */
1700   b->lvec         = 0;
1701   b->Mvctx        = 0;
1702   b->slvec0       = 0;
1703   b->slvec0b      = 0;
1704   b->slvec1       = 0;
1705   b->slvec1a      = 0;
1706   b->slvec1b      = 0;
1707   b->sMvctx       = 0;
1708 
1709   /* stuff for MatGetRow() */
1710   b->rowindices   = 0;
1711   b->rowvalues    = 0;
1712   b->getrowactive = PETSC_FALSE;
1713 
1714   /* hash table stuff */
1715   b->ht           = 0;
1716   b->hd           = 0;
1717   b->ht_size      = 0;
1718   b->ht_flag      = PETSC_FALSE;
1719   b->ht_fact      = 0;
1720   b->ht_total_ct  = 0;
1721   b->ht_insert_ct = 0;
1722 
1723   b->in_loc       = 0;
1724   b->v_loc        = 0;
1725   b->n_loc        = 0;
1726   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1727     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1728     if (flg) {
1729       PetscReal fact = 1.39;
1730       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1731       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1732       if (fact <= 1.0) fact = 1.39;
1733       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1734       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1735     }
1736   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1737 
1738 #if defined(PETSC_HAVE_PASTIX)
1739   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1740 					   "MatGetFactor_mpisbaij_pastix",
1741 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1742 #endif
1743 #if defined(PETSC_HAVE_MUMPS)
1744   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1745                                      "MatGetFactor_sbaij_mumps",
1746                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1747 #endif
1748 #if defined(PETSC_HAVE_SPOOLES)
1749   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1750                                      "MatGetFactor_mpisbaij_spooles",
1751                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1752 #endif
1753   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1754                                      "MatStoreValues_MPISBAIJ",
1755                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1756   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1757                                      "MatRetrieveValues_MPISBAIJ",
1758                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1759   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1760                                      "MatGetDiagonalBlock_MPISBAIJ",
1761                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1762   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1763                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1764                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1765   B->symmetric                  = PETSC_TRUE;
1766   B->structurally_symmetric     = PETSC_TRUE;
1767   B->symmetric_set              = PETSC_TRUE;
1768   B->structurally_symmetric_set = PETSC_TRUE;
1769   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1770   PetscFunctionReturn(0);
1771 }
1772 EXTERN_C_END
1773 
1774 /*MC
1775    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1776 
1777    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1778    and MATMPISBAIJ otherwise.
1779 
1780    Options Database Keys:
1781 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1782 
1783   Level: beginner
1784 
1785 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1786 M*/
1787 
1788 EXTERN_C_BEGIN
1789 #undef __FUNCT__
1790 #define __FUNCT__ "MatCreate_SBAIJ"
1791 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1792 {
1793   PetscErrorCode ierr;
1794   PetscMPIInt    size;
1795 
1796   PetscFunctionBegin;
1797   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1798   if (size == 1) {
1799     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1800   } else {
1801     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1802   }
1803   PetscFunctionReturn(0);
1804 }
1805 EXTERN_C_END
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1809 /*@C
1810    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1811    the user should preallocate the matrix storage by setting the parameters
1812    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1813    performance can be increased by more than a factor of 50.
1814 
1815    Collective on Mat
1816 
1817    Input Parameters:
1818 +  A - the matrix
1819 .  bs   - size of blockk
1820 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1821            submatrix  (same for all local rows)
1822 .  d_nnz - array containing the number of block nonzeros in the various block rows
1823            in the upper triangular and diagonal part of the in diagonal portion of the local
1824            (possibly different for each block row) or PETSC_NULL.  You must leave room
1825            for the diagonal entry even if it is zero.
1826 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1827            submatrix (same for all local rows).
1828 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1829            off-diagonal portion of the local submatrix (possibly different for
1830            each block row) or PETSC_NULL.
1831 
1832 
1833    Options Database Keys:
1834 .   -mat_no_unroll - uses code that does not unroll the loops in the
1835                      block calculations (much slower)
1836 .   -mat_block_size - size of the blocks to use
1837 
1838    Notes:
1839 
1840    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1841    than it must be used on all processors that share the object for that argument.
1842 
1843    If the *_nnz parameter is given then the *_nz parameter is ignored
1844 
1845    Storage Information:
1846    For a square global matrix we define each processor's diagonal portion
1847    to be its local rows and the corresponding columns (a square submatrix);
1848    each processor's off-diagonal portion encompasses the remainder of the
1849    local matrix (a rectangular submatrix).
1850 
1851    The user can specify preallocated storage for the diagonal part of
1852    the local submatrix with either d_nz or d_nnz (not both).  Set
1853    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1854    memory allocation.  Likewise, specify preallocated storage for the
1855    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1856 
1857    You can call MatGetInfo() to get information on how effective the preallocation was;
1858    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1859    You can also run with the option -info and look for messages with the string
1860    malloc in them to see if additional memory allocation was needed.
1861 
1862    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1863    the figure below we depict these three local rows and all columns (0-11).
1864 
1865 .vb
1866            0 1 2 3 4 5 6 7 8 9 10 11
1867           -------------------
1868    row 3  |  o o o d d d o o o o o o
1869    row 4  |  o o o d d d o o o o o o
1870    row 5  |  o o o d d d o o o o o o
1871           -------------------
1872 .ve
1873 
1874    Thus, any entries in the d locations are stored in the d (diagonal)
1875    submatrix, and any entries in the o locations are stored in the
1876    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1877    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1878 
1879    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1880    plus the diagonal part of the d matrix,
1881    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1882    In general, for PDE problems in which most nonzeros are near the diagonal,
1883    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1884    or you will get TERRIBLE performance; see the users' manual chapter on
1885    matrices.
1886 
1887    Level: intermediate
1888 
1889 .keywords: matrix, block, aij, compressed row, sparse, parallel
1890 
1891 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1892 @*/
1893 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1894 {
1895   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1896 
1897   PetscFunctionBegin;
1898   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1899   if (f) {
1900     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1901   }
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 #undef __FUNCT__
1906 #define __FUNCT__ "MatCreateMPISBAIJ"
1907 /*@C
1908    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1909    (block compressed row).  For good matrix assembly performance
1910    the user should preallocate the matrix storage by setting the parameters
1911    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1912    performance can be increased by more than a factor of 50.
1913 
1914    Collective on MPI_Comm
1915 
1916    Input Parameters:
1917 +  comm - MPI communicator
1918 .  bs   - size of blockk
1919 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1920            This value should be the same as the local size used in creating the
1921            y vector for the matrix-vector product y = Ax.
1922 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1923            This value should be the same as the local size used in creating the
1924            x vector for the matrix-vector product y = Ax.
1925 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1926 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1927 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1928            submatrix  (same for all local rows)
1929 .  d_nnz - array containing the number of block nonzeros in the various block rows
1930            in the upper triangular portion of the in diagonal portion of the local
1931            (possibly different for each block block row) or PETSC_NULL.
1932            You must leave room for the diagonal entry even if it is zero.
1933 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1934            submatrix (same for all local rows).
1935 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1936            off-diagonal portion of the local submatrix (possibly different for
1937            each block row) or PETSC_NULL.
1938 
1939    Output Parameter:
1940 .  A - the matrix
1941 
1942    Options Database Keys:
1943 .   -mat_no_unroll - uses code that does not unroll the loops in the
1944                      block calculations (much slower)
1945 .   -mat_block_size - size of the blocks to use
1946 .   -mat_mpi - use the parallel matrix data structures even on one processor
1947                (defaults to using SeqBAIJ format on one processor)
1948 
1949    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1950    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1951    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1952 
1953    Notes:
1954    The number of rows and columns must be divisible by blocksize.
1955    This matrix type does not support complex Hermitian operation.
1956 
1957    The user MUST specify either the local or global matrix dimensions
1958    (possibly both).
1959 
1960    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1961    than it must be used on all processors that share the object for that argument.
1962 
1963    If the *_nnz parameter is given then the *_nz parameter is ignored
1964 
1965    Storage Information:
1966    For a square global matrix we define each processor's diagonal portion
1967    to be its local rows and the corresponding columns (a square submatrix);
1968    each processor's off-diagonal portion encompasses the remainder of the
1969    local matrix (a rectangular submatrix).
1970 
1971    The user can specify preallocated storage for the diagonal part of
1972    the local submatrix with either d_nz or d_nnz (not both).  Set
1973    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1974    memory allocation.  Likewise, specify preallocated storage for the
1975    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1976 
1977    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1978    the figure below we depict these three local rows and all columns (0-11).
1979 
1980 .vb
1981            0 1 2 3 4 5 6 7 8 9 10 11
1982           -------------------
1983    row 3  |  o o o d d d o o o o o o
1984    row 4  |  o o o d d d o o o o o o
1985    row 5  |  o o o d d d o o o o o o
1986           -------------------
1987 .ve
1988 
1989    Thus, any entries in the d locations are stored in the d (diagonal)
1990    submatrix, and any entries in the o locations are stored in the
1991    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1992    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1993 
1994    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1995    plus the diagonal part of the d matrix,
1996    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1997    In general, for PDE problems in which most nonzeros are near the diagonal,
1998    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1999    or you will get TERRIBLE performance; see the users' manual chapter on
2000    matrices.
2001 
2002    Level: intermediate
2003 
2004 .keywords: matrix, block, aij, compressed row, sparse, parallel
2005 
2006 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2007 @*/
2008 
2009 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2010 {
2011   PetscErrorCode ierr;
2012   PetscMPIInt    size;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2016   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2017   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2018   if (size > 1) {
2019     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2020     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2021   } else {
2022     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2023     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2024   }
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2031 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2032 {
2033   Mat            mat;
2034   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2035   PetscErrorCode ierr;
2036   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2037   PetscScalar    *array;
2038 
2039   PetscFunctionBegin;
2040   *newmat       = 0;
2041   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2042   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2043   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2044   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2045   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2046   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2047 
2048   mat->factortype   = matin->factortype;
2049   mat->preallocated = PETSC_TRUE;
2050   mat->assembled    = PETSC_TRUE;
2051   mat->insertmode   = NOT_SET_VALUES;
2052 
2053   a = (Mat_MPISBAIJ*)mat->data;
2054   a->bs2   = oldmat->bs2;
2055   a->mbs   = oldmat->mbs;
2056   a->nbs   = oldmat->nbs;
2057   a->Mbs   = oldmat->Mbs;
2058   a->Nbs   = oldmat->Nbs;
2059 
2060 
2061   a->size         = oldmat->size;
2062   a->rank         = oldmat->rank;
2063   a->donotstash   = oldmat->donotstash;
2064   a->roworiented  = oldmat->roworiented;
2065   a->rowindices   = 0;
2066   a->rowvalues    = 0;
2067   a->getrowactive = PETSC_FALSE;
2068   a->barray       = 0;
2069   a->rstartbs    = oldmat->rstartbs;
2070   a->rendbs      = oldmat->rendbs;
2071   a->cstartbs    = oldmat->cstartbs;
2072   a->cendbs      = oldmat->cendbs;
2073 
2074   /* hash table stuff */
2075   a->ht           = 0;
2076   a->hd           = 0;
2077   a->ht_size      = 0;
2078   a->ht_flag      = oldmat->ht_flag;
2079   a->ht_fact      = oldmat->ht_fact;
2080   a->ht_total_ct  = 0;
2081   a->ht_insert_ct = 0;
2082 
2083   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2084   if (oldmat->colmap) {
2085 #if defined (PETSC_USE_CTABLE)
2086     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2087 #else
2088     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2089     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2090     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2091 #endif
2092   } else a->colmap = 0;
2093 
2094   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2095     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2096     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2097     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2098   } else a->garray = 0;
2099 
2100   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2101   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2102   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2103   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2104   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2105 
2106   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2107   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2108   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2109   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2110 
2111   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2112   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2113   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2114   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2115   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2116   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2117   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2118   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2122   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2123   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2124 
2125   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2126   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2127   a->sMvctx = oldmat->sMvctx;
2128   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2129 
2130   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2132   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2134   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2135   *newmat = mat;
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatLoad_MPISBAIJ"
2141 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2142 {
2143   Mat            A;
2144   PetscErrorCode ierr;
2145   PetscInt       i,nz,j,rstart,rend;
2146   PetscScalar    *vals,*buf;
2147   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2148   MPI_Status     status;
2149   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2150   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2151   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2152   PetscInt       bs=1,Mbs,mbs,extra_rows;
2153   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2154   PetscInt       dcount,kmax,k,nzcount,tmp;
2155   int            fd;
2156 
2157   PetscFunctionBegin;
2158   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2159     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2160   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2161 
2162   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2163   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2164   if (!rank) {
2165     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2166     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2167     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2168     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2169   }
2170 
2171   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2172   M = header[1]; N = header[2];
2173 
2174   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2175 
2176   /*
2177      This code adds extra rows to make sure the number of rows is
2178      divisible by the blocksize
2179   */
2180   Mbs        = M/bs;
2181   extra_rows = bs - M + bs*(Mbs);
2182   if (extra_rows == bs) extra_rows = 0;
2183   else                  Mbs++;
2184   if (extra_rows &&!rank) {
2185     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2186   }
2187 
2188   /* determine ownership of all rows */
2189   mbs        = Mbs/size + ((Mbs % size) > rank);
2190   m          = mbs*bs;
2191   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2192   mmbs       = PetscMPIIntCast(mbs);
2193   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2194   rowners[0] = 0;
2195   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2196   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2197   rstart = rowners[rank];
2198   rend   = rowners[rank+1];
2199 
2200   /* distribute row lengths to all processors */
2201   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2202   if (!rank) {
2203     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2204     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2205     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2206     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2207     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2208     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2209     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2210   } else {
2211     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2212   }
2213 
2214   if (!rank) {   /* procs[0] */
2215     /* calculate the number of nonzeros on each processor */
2216     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2217     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2218     for (i=0; i<size; i++) {
2219       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2220         procsnz[i] += rowlengths[j];
2221       }
2222     }
2223     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2224 
2225     /* determine max buffer needed and allocate it */
2226     maxnz = 0;
2227     for (i=0; i<size; i++) {
2228       maxnz = PetscMax(maxnz,procsnz[i]);
2229     }
2230     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2231 
2232     /* read in my part of the matrix column indices  */
2233     nz     = procsnz[0];
2234     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2235     mycols = ibuf;
2236     if (size == 1)  nz -= extra_rows;
2237     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2238     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2239 
2240     /* read in every ones (except the last) and ship off */
2241     for (i=1; i<size-1; i++) {
2242       nz   = procsnz[i];
2243       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2244       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2245     }
2246     /* read in the stuff for the last proc */
2247     if (size != 1) {
2248       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2249       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2250       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2251       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2252     }
2253     ierr = PetscFree(cols);CHKERRQ(ierr);
2254   } else {  /* procs[i], i>0 */
2255     /* determine buffer space needed for message */
2256     nz = 0;
2257     for (i=0; i<m; i++) {
2258       nz += locrowlens[i];
2259     }
2260     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2261     mycols = ibuf;
2262     /* receive message of column indices*/
2263     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2264     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2265     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2266   }
2267 
2268   /* loop over local rows, determining number of off diagonal entries */
2269   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2270   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2271   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2272   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2273   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2274   rowcount = 0;
2275   nzcount  = 0;
2276   for (i=0; i<mbs; i++) {
2277     dcount  = 0;
2278     odcount = 0;
2279     for (j=0; j<bs; j++) {
2280       kmax = locrowlens[rowcount];
2281       for (k=0; k<kmax; k++) {
2282         tmp = mycols[nzcount++]/bs; /* block col. index */
2283         if (!mask[tmp]) {
2284           mask[tmp] = 1;
2285           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2286           else masked1[dcount++] = tmp; /* entry in diag portion */
2287         }
2288       }
2289       rowcount++;
2290     }
2291 
2292     dlens[i]  = dcount;  /* d_nzz[i] */
2293     odlens[i] = odcount; /* o_nzz[i] */
2294 
2295     /* zero out the mask elements we set */
2296     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2297     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2298   }
2299 
2300   /* create our matrix */
2301   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2302   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2303   ierr = MatSetType(A,type);CHKERRQ(ierr);
2304   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2305   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2306 
2307   if (!rank) {
2308     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2309     /* read in my part of the matrix numerical values  */
2310     nz = procsnz[0];
2311     vals = buf;
2312     mycols = ibuf;
2313     if (size == 1)  nz -= extra_rows;
2314     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2315     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2316 
2317     /* insert into matrix */
2318     jj      = rstart*bs;
2319     for (i=0; i<m; i++) {
2320       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2321       mycols += locrowlens[i];
2322       vals   += locrowlens[i];
2323       jj++;
2324     }
2325 
2326     /* read in other processors (except the last one) and ship out */
2327     for (i=1; i<size-1; i++) {
2328       nz   = procsnz[i];
2329       vals = buf;
2330       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2331       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2332     }
2333     /* the last proc */
2334     if (size != 1){
2335       nz   = procsnz[i] - extra_rows;
2336       vals = buf;
2337       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2338       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2339       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2340     }
2341     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2342 
2343   } else {
2344     /* receive numeric values */
2345     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2346 
2347     /* receive message of values*/
2348     vals   = buf;
2349     mycols = ibuf;
2350     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2351     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2352     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2353 
2354     /* insert into matrix */
2355     jj      = rstart*bs;
2356     for (i=0; i<m; i++) {
2357       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2358       mycols += locrowlens[i];
2359       vals   += locrowlens[i];
2360       jj++;
2361     }
2362   }
2363 
2364   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2365   ierr = PetscFree(buf);CHKERRQ(ierr);
2366   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2367   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2368   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2369   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2370   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2372   *newmat = A;
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2378 /*XXXXX@
2379    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2380 
2381    Input Parameters:
2382 .  mat  - the matrix
2383 .  fact - factor
2384 
2385    Collective on Mat
2386 
2387    Level: advanced
2388 
2389   Notes:
2390    This can also be set by the command line option: -mat_use_hash_table fact
2391 
2392 .keywords: matrix, hashtable, factor, HT
2393 
2394 .seealso: MatSetOption()
2395 @XXXXX*/
2396 
2397 
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2400 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2401 {
2402   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2403   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2404   PetscReal      atmp;
2405   PetscReal      *work,*svalues,*rvalues;
2406   PetscErrorCode ierr;
2407   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2408   PetscMPIInt    rank,size;
2409   PetscInt       *rowners_bs,dest,count,source;
2410   PetscScalar    *va;
2411   MatScalar      *ba;
2412   MPI_Status     stat;
2413 
2414   PetscFunctionBegin;
2415   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2416   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2417   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2418 
2419   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2420   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2421 
2422   bs   = A->rmap->bs;
2423   mbs  = a->mbs;
2424   Mbs  = a->Mbs;
2425   ba   = b->a;
2426   bi   = b->i;
2427   bj   = b->j;
2428 
2429   /* find ownerships */
2430   rowners_bs = A->rmap->range;
2431 
2432   /* each proc creates an array to be distributed */
2433   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2434   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2435 
2436   /* row_max for B */
2437   if (rank != size-1){
2438     for (i=0; i<mbs; i++) {
2439       ncols = bi[1] - bi[0]; bi++;
2440       brow  = bs*i;
2441       for (j=0; j<ncols; j++){
2442         bcol = bs*(*bj);
2443         for (kcol=0; kcol<bs; kcol++){
2444           col = bcol + kcol;                 /* local col index */
2445           col += rowners_bs[rank+1];      /* global col index */
2446           for (krow=0; krow<bs; krow++){
2447             atmp = PetscAbsScalar(*ba); ba++;
2448             row = brow + krow;    /* local row index */
2449             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2450             if (work[col] < atmp) work[col] = atmp;
2451           }
2452         }
2453         bj++;
2454       }
2455     }
2456 
2457     /* send values to its owners */
2458     for (dest=rank+1; dest<size; dest++){
2459       svalues = work + rowners_bs[dest];
2460       count   = rowners_bs[dest+1]-rowners_bs[dest];
2461       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2462     }
2463   }
2464 
2465   /* receive values */
2466   if (rank){
2467     rvalues = work;
2468     count   = rowners_bs[rank+1]-rowners_bs[rank];
2469     for (source=0; source<rank; source++){
2470       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2471       /* process values */
2472       for (i=0; i<count; i++){
2473         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2474       }
2475     }
2476   }
2477 
2478   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2479   ierr = PetscFree(work);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatSOR_MPISBAIJ"
2485 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2486 {
2487   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2488   PetscErrorCode ierr;
2489   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2490   PetscScalar    *x,*b,*ptr,*from;
2491   Vec            bb1;
2492 
2493   PetscFunctionBegin;
2494   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2495   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2496 
2497   if (flag == SOR_APPLY_UPPER) {
2498     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2499     PetscFunctionReturn(0);
2500   }
2501 
2502   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2503     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2504       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2505       its--;
2506     }
2507 
2508     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2509     while (its--){
2510 
2511       /* lower triangular part: slvec0b = - B^T*xx */
2512       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2513 
2514       /* copy xx into slvec0a */
2515       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2516       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2517       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2518       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2519 
2520       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2521 
2522       /* copy bb into slvec1a */
2523       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2524       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2525       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2526       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2527 
2528       /* set slvec1b = 0 */
2529       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2530 
2531       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2532       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2533       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2534       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2535 
2536       /* upper triangular part: bb1 = bb1 - B*x */
2537       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2538 
2539       /* local diagonal sweep */
2540       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2541     }
2542     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2543   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2544     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2545   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2546     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2547   } else if (flag & SOR_EISENSTAT) {
2548     Vec               xx1;
2549     PetscTruth        hasop;
2550     const PetscScalar *diag;
2551     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2552     PetscInt          i,n;
2553 
2554     if (!mat->xx1) {
2555       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2556       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2557     }
2558     xx1 = mat->xx1;
2559     bb1 = mat->bb1;
2560 
2561     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2562 
2563     if (!mat->diag) {
2564       /* this is wrong for same matrix with new nonzero values */
2565       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2566       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2567     }
2568     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2569 
2570     if (hasop) {
2571       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2572       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2573     } else {
2574       /*
2575           These two lines are replaced by code that may be a bit faster for a good compiler
2576       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2577       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2578       */
2579       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2580       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2581       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2582       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2583       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2584       if (omega == 1.0) {
2585 	for (i=0; i<n; i++) {
2586 	  sl[i] = b[i] - diag[i]*x[i];
2587 	}
2588         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2589       } else {
2590 	for (i=0; i<n; i++) {
2591 	  sl[i] = b[i] + scale*diag[i]*x[i];
2592 	}
2593         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2594       }
2595       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2596       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2597       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2598       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2599     }
2600 
2601     /* multiply off-diagonal portion of matrix */
2602     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2603     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2604     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2605     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2606     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2607     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2608     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2609     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2610     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2611     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2612 
2613     /* local sweep */
2614     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2615     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2616   } else {
2617     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2618   }
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 #undef __FUNCT__
2623 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2624 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2625 {
2626   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2627   PetscErrorCode ierr;
2628   Vec            lvec1,bb1;
2629 
2630   PetscFunctionBegin;
2631   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2632   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2633 
2634   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2635     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2636       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2637       its--;
2638     }
2639 
2640     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2641     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2642     while (its--){
2643       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2644 
2645       /* lower diagonal part: bb1 = bb - B^T*xx */
2646       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2647       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2648 
2649       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2650       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2651       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2652 
2653       /* upper diagonal part: bb1 = bb1 - B*x */
2654       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2655       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2656 
2657       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2658 
2659       /* diagonal sweep */
2660       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2661     }
2662     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2663     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2664   } else {
2665     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2666   }
2667   PetscFunctionReturn(0);
2668 }
2669 
2670