xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 6abefa4f446b2ea91915da4e25ea0defa83db48e)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define CHUNKSIZE  10
56 
57 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
58 { \
59  \
60     brow = row/bs;  \
61     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
62     rmax = aimax[brow]; nrow = ailen[brow]; \
63       bcol = col/bs; \
64       ridx = row % bs; cidx = col % bs; \
65       low = 0; high = nrow; \
66       while (high-low > 3) { \
67         t = (low+high)/2; \
68         if (rp[t] > bcol) high = t; \
69         else              low  = t; \
70       } \
71       for (_i=low; _i<high; _i++) { \
72         if (rp[_i] > bcol) break; \
73         if (rp[_i] == bcol) { \
74           bap  = ap +  bs2*_i + bs*cidx + ridx; \
75           if (addv == ADD_VALUES) *bap += value;  \
76           else                    *bap  = value;  \
77           goto a_noinsert; \
78         } \
79       } \
80       if (a->nonew == 1) goto a_noinsert; \
81       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
82       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
83       N = nrow++ - 1;  \
84       /* shift up all the later entries in this row */ \
85       for (ii=N; ii>=_i; ii--) { \
86         rp[ii+1] = rp[ii]; \
87         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
88       } \
89       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
90       rp[_i]                      = bcol;  \
91       ap[bs2*_i + bs*cidx + ridx] = value;  \
92       a_noinsert:; \
93     ailen[brow] = nrow; \
94 }
95 
96 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
97 { \
98     brow = row/bs;  \
99     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
100     rmax = bimax[brow]; nrow = bilen[brow]; \
101       bcol = col/bs; \
102       ridx = row % bs; cidx = col % bs; \
103       low = 0; high = nrow; \
104       while (high-low > 3) { \
105         t = (low+high)/2; \
106         if (rp[t] > bcol) high = t; \
107         else              low  = t; \
108       } \
109       for (_i=low; _i<high; _i++) { \
110         if (rp[_i] > bcol) break; \
111         if (rp[_i] == bcol) { \
112           bap  = ap +  bs2*_i + bs*cidx + ridx; \
113           if (addv == ADD_VALUES) *bap += value;  \
114           else                    *bap  = value;  \
115           goto b_noinsert; \
116         } \
117       } \
118       if (b->nonew == 1) goto b_noinsert; \
119       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
120       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
121       N = nrow++ - 1;  \
122       /* shift up all the later entries in this row */ \
123       for (ii=N; ii>=_i; ii--) { \
124         rp[ii+1] = rp[ii]; \
125         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
126       } \
127       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
128       rp[_i]                      = bcol;  \
129       ap[bs2*_i + bs*cidx + ridx] = value;  \
130       b_noinsert:; \
131     bilen[brow] = nrow; \
132 }
133 
134 /* Only add/insert a(i,j) with i<=j (blocks).
135    Any a(i,j) with i>j input by user is ingored.
136 */
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSetValues_MPISBAIJ"
139 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
140 {
141   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
142   MatScalar      value;
143   PetscTruth     roworiented = baij->roworiented;
144   PetscErrorCode ierr;
145   PetscInt       i,j,row,col;
146   PetscInt       rstart_orig=mat->rmap->rstart;
147   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
148   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
149 
150   /* Some Variables required in the macro */
151   Mat            A = baij->A;
152   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
153   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
154   MatScalar      *aa=a->a;
155 
156   Mat            B = baij->B;
157   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
158   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
159   MatScalar     *ba=b->a;
160 
161   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
162   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
163   MatScalar     *ap,*bap;
164 
165   /* for stash */
166   PetscInt      n_loc, *in_loc = PETSC_NULL;
167   MatScalar     *v_loc = PETSC_NULL;
168 
169   PetscFunctionBegin;
170   if (v) PetscValidScalarPointer(v,6);
171   if (!baij->donotstash){
172     if (n > baij->n_loc) {
173       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
176       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
177       baij->n_loc = n;
178     }
179     in_loc = baij->in_loc;
180     v_loc  = baij->v_loc;
181   }
182 
183   for (i=0; i<m; i++) {
184     if (im[i] < 0) continue;
185 #if defined(PETSC_USE_DEBUG)
186     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
187 #endif
188     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
189       row = im[i] - rstart_orig;              /* local row index */
190       for (j=0; j<n; j++) {
191         if (im[i]/bs > in[j]/bs){
192           if (a->ignore_ltriangular){
193             continue;    /* ignore lower triangular blocks */
194           } else {
195             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
196           }
197         }
198         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
199           col = in[j] - cstart_orig;          /* local col index */
200           brow = row/bs; bcol = col/bs;
201           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
202           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
203           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
204           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
205         } else if (in[j] < 0) continue;
206 #if defined(PETSC_USE_DEBUG)
207         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
208 #endif
209         else {  /* off-diag entry (B) */
210           if (mat->was_assembled) {
211             if (!baij->colmap) {
212               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
213             }
214 #if defined (PETSC_USE_CTABLE)
215             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
216             col  = col - 1;
217 #else
218             col = baij->colmap[in[j]/bs] - 1;
219 #endif
220             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
221               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
222               col =  in[j];
223               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
224               B = baij->B;
225               b = (Mat_SeqBAIJ*)(B)->data;
226               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
227               ba=b->a;
228             } else col += in[j]%bs;
229           } else col = in[j];
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         }
234       }
235     } else {  /* off processor entry */
236       if (!baij->donotstash) {
237         n_loc = 0;
238         for (j=0; j<n; j++){
239           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
240           in_loc[n_loc] = in[j];
241           if (roworiented) {
242             v_loc[n_loc] = v[i*n+j];
243           } else {
244             v_loc[n_loc] = v[j*m+i];
245           }
246           n_loc++;
247         }
248         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
258 {
259   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
260   const MatScalar *value;
261   MatScalar       *barray=baij->barray;
262   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
263   PetscErrorCode  ierr;
264   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
265   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
266   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
267 
268   PetscFunctionBegin;
269   if(!barray) {
270     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
271     baij->barray = barray;
272   }
273 
274   if (roworiented) {
275     stepval = (n-1)*bs;
276   } else {
277     stepval = (m-1)*bs;
278   }
279   for (i=0; i<m; i++) {
280     if (im[i] < 0) continue;
281 #if defined(PETSC_USE_DEBUG)
282     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
283 #endif
284     if (im[i] >= rstart && im[i] < rend) {
285       row = im[i] - rstart;
286       for (j=0; j<n; j++) {
287         if (im[i] > in[j]) {
288           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
289           else SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
290         }
291         /* If NumCol = 1 then a copy is not required */
292         if ((roworiented) && (n == 1)) {
293           barray = (MatScalar*) v + i*bs2;
294         } else if((!roworiented) && (m == 1)) {
295           barray = (MatScalar*) v + j*bs2;
296         } else { /* Here a copy is required */
297           if (roworiented) {
298             value = v + i*(stepval+bs)*bs + j*bs;
299           } else {
300             value = v + j*(stepval+bs)*bs + i*bs;
301           }
302           for (ii=0; ii<bs; ii++,value+=stepval) {
303             for (jj=0; jj<bs; jj++) {
304               *barray++  = *value++;
305             }
306           }
307           barray -=bs2;
308         }
309 
310         if (in[j] >= cstart && in[j] < cend){
311           col  = in[j] - cstart;
312           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
313         }
314         else if (in[j] < 0) continue;
315 #if defined(PETSC_USE_DEBUG)
316         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
317 #endif
318         else {
319           if (mat->was_assembled) {
320             if (!baij->colmap) {
321               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
322             }
323 
324 #if defined(PETSC_USE_DEBUG)
325 #if defined (PETSC_USE_CTABLE)
326             { PetscInt data;
327               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
328               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
329             }
330 #else
331             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
332 #endif
333 #endif
334 #if defined (PETSC_USE_CTABLE)
335 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
336             col  = (col - 1)/bs;
337 #else
338             col = (baij->colmap[in[j]] - 1)/bs;
339 #endif
340             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
341               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
342               col =  in[j];
343             }
344           }
345           else col = in[j];
346           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
347         }
348       }
349     } else {
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = sqrt(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) {
501     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
502   }
503   mat->insertmode = addv; /* in case this processor had no cache */
504 
505   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
506   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
510   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
516 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
519   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
520   PetscErrorCode ierr;
521   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
522   PetscInt       *row,*col;
523   PetscTruth     other_disassembled;
524   PetscMPIInt    n;
525   PetscTruth     r1,r2,r3;
526   MatScalar      *val;
527   InsertMode     addv = mat->insertmode;
528 
529   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
530   PetscFunctionBegin;
531 
532   if (!baij->donotstash) {
533     while (1) {
534       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
535       if (!flg) break;
536 
537       for (i=0; i<n;) {
538         /* Now identify the consecutive vals belonging to the same row */
539         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
540         if (j < n) ncols = j-i;
541         else       ncols = n-i;
542         /* Now assemble all these values with a single function call */
543         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
544         i = j;
545       }
546     }
547     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
548     /* Now process the block-stash. Since the values are stashed column-oriented,
549        set the roworiented flag to column oriented, and after MatSetValues()
550        restore the original flags */
551     r1 = baij->roworiented;
552     r2 = a->roworiented;
553     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
554     baij->roworiented = PETSC_FALSE;
555     a->roworiented    = PETSC_FALSE;
556     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
557     while (1) {
558       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
559       if (!flg) break;
560 
561       for (i=0; i<n;) {
562         /* Now identify the consecutive vals belonging to the same row */
563         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
564         if (j < n) ncols = j-i;
565         else       ncols = n-i;
566         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
571     baij->roworiented = r1;
572     a->roworiented    = r2;
573     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
574   }
575 
576   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
577   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
578 
579   /* determine if any processor has disassembled, if so we must
580      also disassemble ourselfs, in order that we may reassemble. */
581   /*
582      if nonzero structure of submatrix B cannot change then we know that
583      no processor disassembled thus we can skip this stuff
584   */
585   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
586     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
587     if (mat->was_assembled && !other_disassembled) {
588       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
589     }
590   }
591 
592   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
593     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
594   }
595   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
596   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
597   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
598 
599   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
600   baij->rowvalues = 0;
601 
602   PetscFunctionReturn(0);
603 }
604 
605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606 #undef __FUNCT__
607 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
608 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
609 {
610   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
611   PetscErrorCode    ierr;
612   PetscInt          bs = mat->rmap->bs;
613   PetscMPIInt       size = baij->size,rank = baij->rank;
614   PetscTruth        iascii,isdraw;
615   PetscViewer       sviewer;
616   PetscViewerFormat format;
617 
618   PetscFunctionBegin;
619   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
620   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
621   if (iascii) {
622     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
626       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
628               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
629               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
632       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
634       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
635       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
636       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_INFO) {
639       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
640       PetscFunctionReturn(0);
641     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
642       PetscFunctionReturn(0);
643     }
644   }
645 
646   if (isdraw) {
647     PetscDraw  draw;
648     PetscTruth isnull;
649     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
650     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
651   }
652 
653   if (size == 1) {
654     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
655     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
656   } else {
657     /* assemble the entire matrix onto first processor. */
658     Mat          A;
659     Mat_SeqSBAIJ *Aloc;
660     Mat_SeqBAIJ  *Bloc;
661     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
662     MatScalar    *a;
663 
664     /* Should this be the same type as mat? */
665     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
666     if (!rank) {
667       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
668     } else {
669       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
670     }
671     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
672     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
673     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
674 
675     /* copy over the A part */
676     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
677     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
678     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
679 
680     for (i=0; i<mbs; i++) {
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = (baij->cstartbs+aj[j])*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++; a += bs;
688         }
689       }
690     }
691     /* copy over the B part */
692     Bloc = (Mat_SeqBAIJ*)baij->B->data;
693     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
694     for (i=0; i<mbs; i++) {
695 
696       rvals[0] = bs*(baij->rstartbs + i);
697       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
698       for (j=ai[i]; j<ai[i+1]; j++) {
699         col = baij->garray[aj[j]]*bs;
700         for (k=0; k<bs; k++) {
701           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
702           col++; a += bs;
703         }
704       }
705     }
706     ierr = PetscFree(rvals);CHKERRQ(ierr);
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     /*
710        Everyone has to call to draw the matrix since the graphics waits are
711        synchronized across all processors that share the PetscDraw object
712     */
713     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
714     if (!rank) {
715       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
716       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
717     }
718     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
719     ierr = MatDestroy(A);CHKERRQ(ierr);
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 #undef __FUNCT__
725 #define __FUNCT__ "MatView_MPISBAIJ"
726 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
727 {
728   PetscErrorCode ierr;
729   PetscTruth     iascii,isdraw,issocket,isbinary;
730 
731   PetscFunctionBegin;
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
735   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
736   if (iascii || isdraw || issocket || isbinary) {
737     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
738   } else {
739     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatDestroy_MPISBAIJ"
746 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
747 {
748   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752 #if defined(PETSC_USE_LOG)
753   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
754 #endif
755   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
756   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
757   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
758   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
759 #if defined (PETSC_USE_CTABLE)
760   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
761 #else
762   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
763 #endif
764   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
765   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
766   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
767   if (baij->slvec0) {
768     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
770   }
771   if (baij->slvec1) {
772     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
773     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
774     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
775   }
776   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
777   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
778   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
779   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
780   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
781   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
782   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
783 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
784   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
785 #endif
786   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
788   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
789   ierr = PetscFree(baij);CHKERRQ(ierr);
790 
791   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
796   PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNCT__
800 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
801 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
802 {
803   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
804   PetscErrorCode ierr;
805   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
806   PetscScalar    *x,*from;
807 
808   PetscFunctionBegin;
809   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
810   if (nt != A->cmap->n) {
811     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
812   }
813 
814   /* diagonal part */
815   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
816   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
817 
818   /* subdiagonal part */
819   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
820 
821   /* copy x into the vec slvec0 */
822   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
826   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
827   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
828 
829   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
831   /* supperdiagonal part */
832   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatMult_MPISBAIJ"
838 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
839 {
840   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
841   PetscErrorCode ierr;
842   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
843   PetscScalar    *x,*from;
844 
845   PetscFunctionBegin;
846   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
847   if (nt != A->cmap->n) {
848     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
849   }
850 
851   /* diagonal part */
852   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
853   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
854 
855   /* subdiagonal part */
856   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
857 
858   /* copy x into the vec slvec0 */
859   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
860   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
861 
862   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
863   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
864   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
865 
866   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   /* supperdiagonal part */
869   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
875 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
876 {
877   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
878   PetscErrorCode ierr;
879   PetscInt       nt;
880 
881   PetscFunctionBegin;
882   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
883   if (nt != A->cmap->n) {
884     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
885   }
886   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
887   if (nt != A->rmap->N) {
888     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
889   }
890 
891   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
892   /* do diagonal part */
893   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
894   /* do supperdiagonal part */
895   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
897   /* do subdiagonal part */
898   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
899   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
900   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
901 
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
907 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
908 {
909   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
910   PetscErrorCode ierr;
911   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
912   PetscScalar    *x,*from,zero=0.0;
913 
914   PetscFunctionBegin;
915   /*
916   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
917   PetscSynchronizedFlush(((PetscObject)A)->comm);
918   */
919   /* diagonal part */
920   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
921   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
922 
923   /* subdiagonal part */
924   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
925 
926   /* copy x into the vec slvec0 */
927   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
928   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
929   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
930   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
931 
932   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
933   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
934   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935 
936   /* supperdiagonal part */
937   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
938 
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
944 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
945 {
946   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
947   PetscErrorCode ierr;
948 
949   PetscFunctionBegin;
950   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
951   /* do diagonal part */
952   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
953   /* do supperdiagonal part */
954   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
955   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
956 
957   /* do subdiagonal part */
958   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
959   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
960   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
961 
962   PetscFunctionReturn(0);
963 }
964 
965 /*
966   This only works correctly for square matrices where the subblock A->A is the
967    diagonal block
968 */
969 #undef __FUNCT__
970 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
971 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
972 {
973   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
978   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatScale_MPISBAIJ"
984 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
985 {
986   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
991   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 #undef __FUNCT__
996 #define __FUNCT__ "MatGetRow_MPISBAIJ"
997 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
998 {
999   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1000   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1001   PetscErrorCode ierr;
1002   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1003   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1004   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1005 
1006   PetscFunctionBegin;
1007   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1008   mat->getrowactive = PETSC_TRUE;
1009 
1010   if (!mat->rowvalues && (idx || v)) {
1011     /*
1012         allocate enough space to hold information from the longest row.
1013     */
1014     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1015     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1016     PetscInt     max = 1,mbs = mat->mbs,tmp;
1017     for (i=0; i<mbs; i++) {
1018       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1019       if (max < tmp) { max = tmp; }
1020     }
1021     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1022   }
1023 
1024   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1025   lrow = row - brstart;  /* local row index */
1026 
1027   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1028   if (!v)   {pvA = 0; pvB = 0;}
1029   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1030   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1031   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1032   nztot = nzA + nzB;
1033 
1034   cmap  = mat->garray;
1035   if (v  || idx) {
1036     if (nztot) {
1037       /* Sort by increasing column numbers, assuming A and B already sorted */
1038       PetscInt imark = -1;
1039       if (v) {
1040         *v = v_p = mat->rowvalues;
1041         for (i=0; i<nzB; i++) {
1042           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1043           else break;
1044         }
1045         imark = i;
1046         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1047         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1048       }
1049       if (idx) {
1050         *idx = idx_p = mat->rowindices;
1051         if (imark > -1) {
1052           for (i=0; i<imark; i++) {
1053             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1054           }
1055         } else {
1056           for (i=0; i<nzB; i++) {
1057             if (cmap[cworkB[i]/bs] < cstart)
1058               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1059             else break;
1060           }
1061           imark = i;
1062         }
1063         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1064         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1065       }
1066     } else {
1067       if (idx) *idx = 0;
1068       if (v)   *v   = 0;
1069     }
1070   }
1071   *nz = nztot;
1072   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1073   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1079 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1080 {
1081   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1082 
1083   PetscFunctionBegin;
1084   if (!baij->getrowactive) {
1085     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1086   }
1087   baij->getrowactive = PETSC_FALSE;
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_TRUE;
1100   PetscFunctionReturn(0);
1101 }
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1104 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1105 {
1106   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1107   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1108 
1109   PetscFunctionBegin;
1110   aA->getrow_utriangular = PETSC_FALSE;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1116 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1117 {
1118   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1123   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1129 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1130 {
1131   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1136   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1142 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1143 {
1144   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1145   PetscErrorCode ierr;
1146 
1147   PetscFunctionBegin;
1148   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1149   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1150   PetscFunctionReturn(0);
1151 }
1152 
1153 #undef __FUNCT__
1154 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1155 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1156 {
1157   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1158   Mat            A = a->A,B = a->B;
1159   PetscErrorCode ierr;
1160   PetscReal      isend[5],irecv[5];
1161 
1162   PetscFunctionBegin;
1163   info->block_size     = (PetscReal)matin->rmap->bs;
1164   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1165   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1166   isend[3] = info->memory;  isend[4] = info->mallocs;
1167   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1168   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1169   isend[3] += info->memory;  isend[4] += info->mallocs;
1170   if (flag == MAT_LOCAL) {
1171     info->nz_used      = isend[0];
1172     info->nz_allocated = isend[1];
1173     info->nz_unneeded  = isend[2];
1174     info->memory       = isend[3];
1175     info->mallocs      = isend[4];
1176   } else if (flag == MAT_GLOBAL_MAX) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   } else if (flag == MAT_GLOBAL_SUM) {
1184     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1185     info->nz_used      = irecv[0];
1186     info->nz_allocated = irecv[1];
1187     info->nz_unneeded  = irecv[2];
1188     info->memory       = irecv[3];
1189     info->mallocs      = irecv[4];
1190   } else {
1191     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1192   }
1193   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1194   info->fill_ratio_needed = 0;
1195   info->factor_mallocs    = 0;
1196   PetscFunctionReturn(0);
1197 }
1198 
1199 #undef __FUNCT__
1200 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1201 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1202 {
1203   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1204   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1205   PetscErrorCode ierr;
1206 
1207   PetscFunctionBegin;
1208   switch (op) {
1209   case MAT_NEW_NONZERO_LOCATIONS:
1210   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1211   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1212   case MAT_KEEP_NONZERO_PATTERN:
1213   case MAT_NEW_NONZERO_LOCATION_ERR:
1214     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1215     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1216     break;
1217   case MAT_ROW_ORIENTED:
1218     a->roworiented = flg;
1219     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1220     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1221     break;
1222   case MAT_NEW_DIAGONALS:
1223     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1224     break;
1225   case MAT_IGNORE_OFF_PROC_ENTRIES:
1226     a->donotstash = flg;
1227     break;
1228   case MAT_USE_HASH_TABLE:
1229     a->ht_flag = flg;
1230     break;
1231   case MAT_HERMITIAN:
1232     if (!A->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1233     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1234     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1235     break;
1236   case MAT_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_STRUCTURALLY_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1241     break;
1242   case MAT_SYMMETRY_ETERNAL:
1243     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1244     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1245     break;
1246   case MAT_IGNORE_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_ERROR_LOWER_TRIANGULAR:
1250     aA->ignore_ltriangular = flg;
1251     break;
1252   case MAT_GETROW_UPPERTRIANGULAR:
1253     aA->getrow_utriangular = flg;
1254     break;
1255   default:
1256     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1263 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1264 {
1265   PetscErrorCode ierr;
1266   PetscFunctionBegin;
1267   if (MAT_INITIAL_MATRIX || *B != A) {
1268     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1275 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1276 {
1277   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1278   Mat            a=baij->A, b=baij->B;
1279   PetscErrorCode ierr;
1280   PetscInt       nv,m,n;
1281   PetscTruth     flg;
1282 
1283   PetscFunctionBegin;
1284   if (ll != rr){
1285     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1286     if (!flg)
1287       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1288   }
1289   if (!ll) PetscFunctionReturn(0);
1290 
1291   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1292   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1293 
1294   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1295   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1296 
1297   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1298 
1299   /* left diagonalscale the off-diagonal part */
1300   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1301 
1302   /* scale the diagonal part */
1303   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1304 
1305   /* right diagonalscale the off-diagonal part */
1306   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1313 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1314 {
1315   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatEqual_MPISBAIJ"
1327 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1328 {
1329   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1330   Mat            a,b,c,d;
1331   PetscTruth     flg;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   a = matA->A; b = matA->B;
1336   c = matB->A; d = matB->B;
1337 
1338   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1339   if (flg) {
1340     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1341   }
1342   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatCopy_MPISBAIJ"
1348 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1349 {
1350   PetscErrorCode ierr;
1351   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1352   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1353 
1354   PetscFunctionBegin;
1355   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1356   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1357     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1358     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1359     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1360   } else {
1361     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1362     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1369 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1370 {
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1380 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1381 {
1382   PetscErrorCode ierr;
1383   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1384   PetscBLASInt   bnz,one=1;
1385   Mat_SeqSBAIJ   *xa,*ya;
1386   Mat_SeqBAIJ    *xb,*yb;
1387 
1388   PetscFunctionBegin;
1389   if (str == SAME_NONZERO_PATTERN) {
1390     PetscScalar alpha = a;
1391     xa = (Mat_SeqSBAIJ *)xx->A->data;
1392     ya = (Mat_SeqSBAIJ *)yy->A->data;
1393     bnz = PetscBLASIntCast(xa->nz);
1394     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1395     xb = (Mat_SeqBAIJ *)xx->B->data;
1396     yb = (Mat_SeqBAIJ *)yy->B->data;
1397     bnz = PetscBLASIntCast(xb->nz);
1398     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1399   } else {
1400     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1401     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1402     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1409 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1410 {
1411   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1412   PetscInt        rbs,cbs;
1413   PetscErrorCode  ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1417   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1418   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1419   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1420   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1421   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1427 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1428 {
1429   PetscErrorCode ierr;
1430   PetscInt       i;
1431   PetscTruth     flg;
1432 
1433   PetscFunctionBegin;
1434   for (i=0; i<n; i++) {
1435     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1436     if (!flg) {
1437       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1438     }
1439   }
1440   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 
1445 /* -------------------------------------------------------------------*/
1446 static struct _MatOps MatOps_Values = {
1447        MatSetValues_MPISBAIJ,
1448        MatGetRow_MPISBAIJ,
1449        MatRestoreRow_MPISBAIJ,
1450        MatMult_MPISBAIJ,
1451 /* 4*/ MatMultAdd_MPISBAIJ,
1452        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1453        MatMultAdd_MPISBAIJ,
1454        0,
1455        0,
1456        0,
1457 /*10*/ 0,
1458        0,
1459        0,
1460        MatSOR_MPISBAIJ,
1461        MatTranspose_MPISBAIJ,
1462 /*15*/ MatGetInfo_MPISBAIJ,
1463        MatEqual_MPISBAIJ,
1464        MatGetDiagonal_MPISBAIJ,
1465        MatDiagonalScale_MPISBAIJ,
1466        MatNorm_MPISBAIJ,
1467 /*20*/ MatAssemblyBegin_MPISBAIJ,
1468        MatAssemblyEnd_MPISBAIJ,
1469        MatSetOption_MPISBAIJ,
1470        MatZeroEntries_MPISBAIJ,
1471 /*24*/ 0,
1472        0,
1473        0,
1474        0,
1475        0,
1476 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*34*/ MatDuplicate_MPISBAIJ,
1482        0,
1483        0,
1484        0,
1485        0,
1486 /*39*/ MatAXPY_MPISBAIJ,
1487        MatGetSubMatrices_MPISBAIJ,
1488        MatIncreaseOverlap_MPISBAIJ,
1489        MatGetValues_MPISBAIJ,
1490        MatCopy_MPISBAIJ,
1491 /*44*/ 0,
1492        MatScale_MPISBAIJ,
1493        0,
1494        0,
1495        0,
1496 /*49*/ MatSetBlockSize_MPISBAIJ,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*54*/ 0,
1502        0,
1503        MatSetUnfactored_MPISBAIJ,
1504        0,
1505        MatSetValuesBlocked_MPISBAIJ,
1506 /*59*/ 0,
1507        0,
1508        0,
1509        0,
1510        0,
1511 /*64*/ 0,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*74*/ 0,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*79*/ 0,
1527        0,
1528        0,
1529        0,
1530        MatLoad_MPISBAIJ,
1531 /*84*/ 0,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*89*/ 0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*94*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*99*/ 0,
1547        0,
1548        0,
1549        0,
1550        0,
1551 /*104*/0,
1552        MatRealPart_MPISBAIJ,
1553        MatImaginaryPart_MPISBAIJ,
1554        MatGetRowUpperTriangular_MPISBAIJ,
1555        MatRestoreRowUpperTriangular_MPISBAIJ
1556 };
1557 
1558 
1559 EXTERN_C_BEGIN
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1562 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1563 {
1564   PetscFunctionBegin;
1565   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1566   *iscopy = PETSC_FALSE;
1567   PetscFunctionReturn(0);
1568 }
1569 EXTERN_C_END
1570 
1571 EXTERN_C_BEGIN
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1574 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1575 {
1576   Mat_MPISBAIJ   *b;
1577   PetscErrorCode ierr;
1578   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1579 
1580   PetscFunctionBegin;
1581   if (bs < 0){
1582     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1583       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1584     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1585     bs   = PetscAbs(bs);
1586   }
1587   if ((d_nnz || o_nnz) && newbs != bs) {
1588     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1589   }
1590   bs = newbs;
1591 
1592   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1593   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1594   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1595   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1596 
1597   B->rmap->bs = B->cmap->bs = bs;
1598   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1599   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1600 
1601   if (d_nnz) {
1602     for (i=0; i<B->rmap->n/bs; i++) {
1603       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1604     }
1605   }
1606   if (o_nnz) {
1607     for (i=0; i<B->rmap->n/bs; i++) {
1608       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1609     }
1610   }
1611 
1612   b   = (Mat_MPISBAIJ*)B->data;
1613   mbs = B->rmap->n/bs;
1614   Mbs = B->rmap->N/bs;
1615   if (mbs*bs != B->rmap->n) {
1616     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1617   }
1618 
1619   B->rmap->bs  = bs;
1620   b->bs2 = bs*bs;
1621   b->mbs = mbs;
1622   b->nbs = mbs;
1623   b->Mbs = Mbs;
1624   b->Nbs = Mbs;
1625 
1626   for (i=0; i<=b->size; i++) {
1627     b->rangebs[i] = B->rmap->range[i]/bs;
1628   }
1629   b->rstartbs = B->rmap->rstart/bs;
1630   b->rendbs   = B->rmap->rend/bs;
1631 
1632   b->cstartbs = B->cmap->rstart/bs;
1633   b->cendbs   = B->cmap->rend/bs;
1634 
1635   if (!B->preallocated) {
1636     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1637     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1638     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1639     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1640     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1641     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1642     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1643     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1644     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1645   }
1646 
1647   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1648   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1649   B->preallocated = PETSC_TRUE;
1650   PetscFunctionReturn(0);
1651 }
1652 EXTERN_C_END
1653 
1654 EXTERN_C_BEGIN
1655 #if defined(PETSC_HAVE_MUMPS)
1656 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1657 #endif
1658 #if defined(PETSC_HAVE_SPOOLES)
1659 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1660 #endif
1661 #if defined(PETSC_HAVE_PASTIX)
1662 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1663 #endif
1664 EXTERN_C_END
1665 
1666 /*MC
1667    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1668    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1669 
1670    Options Database Keys:
1671 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1672 
1673   Level: beginner
1674 
1675 .seealso: MatCreateMPISBAIJ
1676 M*/
1677 
1678 EXTERN_C_BEGIN
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatCreate_MPISBAIJ"
1681 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1682 {
1683   Mat_MPISBAIJ   *b;
1684   PetscErrorCode ierr;
1685   PetscTruth     flg;
1686 
1687   PetscFunctionBegin;
1688 
1689   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1690   B->data = (void*)b;
1691   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1692 
1693   B->ops->destroy    = MatDestroy_MPISBAIJ;
1694   B->ops->view       = MatView_MPISBAIJ;
1695   B->mapping         = 0;
1696   B->assembled       = PETSC_FALSE;
1697 
1698   B->insertmode = NOT_SET_VALUES;
1699   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1700   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1701 
1702   /* build local table of row and column ownerships */
1703   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1704 
1705   /* build cache for off array entries formed */
1706   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1707   b->donotstash  = PETSC_FALSE;
1708   b->colmap      = PETSC_NULL;
1709   b->garray      = PETSC_NULL;
1710   b->roworiented = PETSC_TRUE;
1711 
1712   /* stuff used in block assembly */
1713   b->barray       = 0;
1714 
1715   /* stuff used for matrix vector multiply */
1716   b->lvec         = 0;
1717   b->Mvctx        = 0;
1718   b->slvec0       = 0;
1719   b->slvec0b      = 0;
1720   b->slvec1       = 0;
1721   b->slvec1a      = 0;
1722   b->slvec1b      = 0;
1723   b->sMvctx       = 0;
1724 
1725   /* stuff for MatGetRow() */
1726   b->rowindices   = 0;
1727   b->rowvalues    = 0;
1728   b->getrowactive = PETSC_FALSE;
1729 
1730   /* hash table stuff */
1731   b->ht           = 0;
1732   b->hd           = 0;
1733   b->ht_size      = 0;
1734   b->ht_flag      = PETSC_FALSE;
1735   b->ht_fact      = 0;
1736   b->ht_total_ct  = 0;
1737   b->ht_insert_ct = 0;
1738 
1739   b->in_loc       = 0;
1740   b->v_loc        = 0;
1741   b->n_loc        = 0;
1742   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1743     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1744     if (flg) {
1745       PetscReal fact = 1.39;
1746       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1747       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1748       if (fact <= 1.0) fact = 1.39;
1749       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1750       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1751     }
1752   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1753 
1754 #if defined(PETSC_HAVE_PASTIX)
1755   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1756 					   "MatGetFactor_mpisbaij_pastix",
1757 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1758 #endif
1759 #if defined(PETSC_HAVE_MUMPS)
1760   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1761                                      "MatGetFactor_mpisbaij_mumps",
1762                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1763 #endif
1764 #if defined(PETSC_HAVE_SPOOLES)
1765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1766                                      "MatGetFactor_mpisbaij_spooles",
1767                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1768 #endif
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1770                                      "MatStoreValues_MPISBAIJ",
1771                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1773                                      "MatRetrieveValues_MPISBAIJ",
1774                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1776                                      "MatGetDiagonalBlock_MPISBAIJ",
1777                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1779                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1780                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1781   B->symmetric                  = PETSC_TRUE;
1782   B->structurally_symmetric     = PETSC_TRUE;
1783   B->symmetric_set              = PETSC_TRUE;
1784   B->structurally_symmetric_set = PETSC_TRUE;
1785   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 EXTERN_C_END
1789 
1790 /*MC
1791    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1792 
1793    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1794    and MATMPISBAIJ otherwise.
1795 
1796    Options Database Keys:
1797 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1798 
1799   Level: beginner
1800 
1801 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1802 M*/
1803 
1804 EXTERN_C_BEGIN
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatCreate_SBAIJ"
1807 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1808 {
1809   PetscErrorCode ierr;
1810   PetscMPIInt    size;
1811 
1812   PetscFunctionBegin;
1813   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1814   if (size == 1) {
1815     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1816   } else {
1817     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 EXTERN_C_END
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1825 /*@C
1826    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1827    the user should preallocate the matrix storage by setting the parameters
1828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1829    performance can be increased by more than a factor of 50.
1830 
1831    Collective on Mat
1832 
1833    Input Parameters:
1834 +  A - the matrix
1835 .  bs   - size of blockk
1836 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1837            submatrix  (same for all local rows)
1838 .  d_nnz - array containing the number of block nonzeros in the various block rows
1839            in the upper triangular and diagonal part of the in diagonal portion of the local
1840            (possibly different for each block row) or PETSC_NULL.  You must leave room
1841            for the diagonal entry even if it is zero.
1842 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1843            submatrix (same for all local rows).
1844 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1845            off-diagonal portion of the local submatrix (possibly different for
1846            each block row) or PETSC_NULL.
1847 
1848 
1849    Options Database Keys:
1850 .   -mat_no_unroll - uses code that does not unroll the loops in the
1851                      block calculations (much slower)
1852 .   -mat_block_size - size of the blocks to use
1853 
1854    Notes:
1855 
1856    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1857    than it must be used on all processors that share the object for that argument.
1858 
1859    If the *_nnz parameter is given then the *_nz parameter is ignored
1860 
1861    Storage Information:
1862    For a square global matrix we define each processor's diagonal portion
1863    to be its local rows and the corresponding columns (a square submatrix);
1864    each processor's off-diagonal portion encompasses the remainder of the
1865    local matrix (a rectangular submatrix).
1866 
1867    The user can specify preallocated storage for the diagonal part of
1868    the local submatrix with either d_nz or d_nnz (not both).  Set
1869    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1870    memory allocation.  Likewise, specify preallocated storage for the
1871    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1872 
1873    You can call MatGetInfo() to get information on how effective the preallocation was;
1874    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1875    You can also run with the option -info and look for messages with the string
1876    malloc in them to see if additional memory allocation was needed.
1877 
1878    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1879    the figure below we depict these three local rows and all columns (0-11).
1880 
1881 .vb
1882            0 1 2 3 4 5 6 7 8 9 10 11
1883           -------------------
1884    row 3  |  o o o d d d o o o o o o
1885    row 4  |  o o o d d d o o o o o o
1886    row 5  |  o o o d d d o o o o o o
1887           -------------------
1888 .ve
1889 
1890    Thus, any entries in the d locations are stored in the d (diagonal)
1891    submatrix, and any entries in the o locations are stored in the
1892    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1893    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1894 
1895    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1896    plus the diagonal part of the d matrix,
1897    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1898    In general, for PDE problems in which most nonzeros are near the diagonal,
1899    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1900    or you will get TERRIBLE performance; see the users' manual chapter on
1901    matrices.
1902 
1903    Level: intermediate
1904 
1905 .keywords: matrix, block, aij, compressed row, sparse, parallel
1906 
1907 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1908 @*/
1909 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1910 {
1911   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1912 
1913   PetscFunctionBegin;
1914   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1915   if (f) {
1916     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1917   }
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatCreateMPISBAIJ"
1923 /*@C
1924    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1925    (block compressed row).  For good matrix assembly performance
1926    the user should preallocate the matrix storage by setting the parameters
1927    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1928    performance can be increased by more than a factor of 50.
1929 
1930    Collective on MPI_Comm
1931 
1932    Input Parameters:
1933 +  comm - MPI communicator
1934 .  bs   - size of blockk
1935 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1936            This value should be the same as the local size used in creating the
1937            y vector for the matrix-vector product y = Ax.
1938 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1939            This value should be the same as the local size used in creating the
1940            x vector for the matrix-vector product y = Ax.
1941 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1942 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1943 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1944            submatrix  (same for all local rows)
1945 .  d_nnz - array containing the number of block nonzeros in the various block rows
1946            in the upper triangular portion of the in diagonal portion of the local
1947            (possibly different for each block block row) or PETSC_NULL.
1948            You must leave room for the diagonal entry even if it is zero.
1949 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1950            submatrix (same for all local rows).
1951 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1952            off-diagonal portion of the local submatrix (possibly different for
1953            each block row) or PETSC_NULL.
1954 
1955    Output Parameter:
1956 .  A - the matrix
1957 
1958    Options Database Keys:
1959 .   -mat_no_unroll - uses code that does not unroll the loops in the
1960                      block calculations (much slower)
1961 .   -mat_block_size - size of the blocks to use
1962 .   -mat_mpi - use the parallel matrix data structures even on one processor
1963                (defaults to using SeqBAIJ format on one processor)
1964 
1965    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1966    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1967    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1968 
1969    Notes:
1970    The number of rows and columns must be divisible by blocksize.
1971    This matrix type does not support complex Hermitian operation.
1972 
1973    The user MUST specify either the local or global matrix dimensions
1974    (possibly both).
1975 
1976    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1977    than it must be used on all processors that share the object for that argument.
1978 
1979    If the *_nnz parameter is given then the *_nz parameter is ignored
1980 
1981    Storage Information:
1982    For a square global matrix we define each processor's diagonal portion
1983    to be its local rows and the corresponding columns (a square submatrix);
1984    each processor's off-diagonal portion encompasses the remainder of the
1985    local matrix (a rectangular submatrix).
1986 
1987    The user can specify preallocated storage for the diagonal part of
1988    the local submatrix with either d_nz or d_nnz (not both).  Set
1989    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1990    memory allocation.  Likewise, specify preallocated storage for the
1991    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1992 
1993    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1994    the figure below we depict these three local rows and all columns (0-11).
1995 
1996 .vb
1997            0 1 2 3 4 5 6 7 8 9 10 11
1998           -------------------
1999    row 3  |  o o o d d d o o o o o o
2000    row 4  |  o o o d d d o o o o o o
2001    row 5  |  o o o d d d o o o o o o
2002           -------------------
2003 .ve
2004 
2005    Thus, any entries in the d locations are stored in the d (diagonal)
2006    submatrix, and any entries in the o locations are stored in the
2007    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2008    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2009 
2010    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2011    plus the diagonal part of the d matrix,
2012    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2013    In general, for PDE problems in which most nonzeros are near the diagonal,
2014    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2015    or you will get TERRIBLE performance; see the users' manual chapter on
2016    matrices.
2017 
2018    Level: intermediate
2019 
2020 .keywords: matrix, block, aij, compressed row, sparse, parallel
2021 
2022 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2023 @*/
2024 
2025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2026 {
2027   PetscErrorCode ierr;
2028   PetscMPIInt    size;
2029 
2030   PetscFunctionBegin;
2031   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2032   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2034   if (size > 1) {
2035     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2036     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2037   } else {
2038     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2039     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2040   }
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2047 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2048 {
2049   Mat            mat;
2050   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2051   PetscErrorCode ierr;
2052   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2053   PetscScalar    *array;
2054 
2055   PetscFunctionBegin;
2056   *newmat       = 0;
2057   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2058   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2059   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2060   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2061   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2062   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2063 
2064   mat->factor       = matin->factor;
2065   mat->preallocated = PETSC_TRUE;
2066   mat->assembled    = PETSC_TRUE;
2067   mat->insertmode   = NOT_SET_VALUES;
2068 
2069   a = (Mat_MPISBAIJ*)mat->data;
2070   a->bs2   = oldmat->bs2;
2071   a->mbs   = oldmat->mbs;
2072   a->nbs   = oldmat->nbs;
2073   a->Mbs   = oldmat->Mbs;
2074   a->Nbs   = oldmat->Nbs;
2075 
2076 
2077   a->size         = oldmat->size;
2078   a->rank         = oldmat->rank;
2079   a->donotstash   = oldmat->donotstash;
2080   a->roworiented  = oldmat->roworiented;
2081   a->rowindices   = 0;
2082   a->rowvalues    = 0;
2083   a->getrowactive = PETSC_FALSE;
2084   a->barray       = 0;
2085   a->rstartbs    = oldmat->rstartbs;
2086   a->rendbs      = oldmat->rendbs;
2087   a->cstartbs    = oldmat->cstartbs;
2088   a->cendbs      = oldmat->cendbs;
2089 
2090   /* hash table stuff */
2091   a->ht           = 0;
2092   a->hd           = 0;
2093   a->ht_size      = 0;
2094   a->ht_flag      = oldmat->ht_flag;
2095   a->ht_fact      = oldmat->ht_fact;
2096   a->ht_total_ct  = 0;
2097   a->ht_insert_ct = 0;
2098 
2099   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2100   if (oldmat->colmap) {
2101 #if defined (PETSC_USE_CTABLE)
2102     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2103 #else
2104     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2105     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2106     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2107 #endif
2108   } else a->colmap = 0;
2109 
2110   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2111     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2112     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2113     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2114   } else a->garray = 0;
2115 
2116   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2117   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2119   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2121 
2122   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2123   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2124   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2125   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2126 
2127   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2128   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2129   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2131   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2132   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2133   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2134   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2140 
2141   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2142   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2143   a->sMvctx = oldmat->sMvctx;
2144   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2145 
2146   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2148   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2149   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2150   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2151   *newmat = mat;
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatLoad_MPISBAIJ"
2157 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2158 {
2159   Mat            A;
2160   PetscErrorCode ierr;
2161   PetscInt       i,nz,j,rstart,rend;
2162   PetscScalar    *vals,*buf;
2163   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2164   MPI_Status     status;
2165   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2166   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2167   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2168   PetscInt       bs=1,Mbs,mbs,extra_rows;
2169   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2170   PetscInt       dcount,kmax,k,nzcount,tmp;
2171   int            fd;
2172 
2173   PetscFunctionBegin;
2174   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2175     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2176   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2177 
2178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2179   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2180   if (!rank) {
2181     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2182     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2183     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2184     if (header[3] < 0) {
2185       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2186     }
2187   }
2188 
2189   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2190   M = header[1]; N = header[2];
2191 
2192   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2193 
2194   /*
2195      This code adds extra rows to make sure the number of rows is
2196      divisible by the blocksize
2197   */
2198   Mbs        = M/bs;
2199   extra_rows = bs - M + bs*(Mbs);
2200   if (extra_rows == bs) extra_rows = 0;
2201   else                  Mbs++;
2202   if (extra_rows &&!rank) {
2203     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2204   }
2205 
2206   /* determine ownership of all rows */
2207   mbs        = Mbs/size + ((Mbs % size) > rank);
2208   m          = mbs*bs;
2209   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2210   mmbs       = PetscMPIIntCast(mbs);
2211   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2212   rowners[0] = 0;
2213   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2214   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2215   rstart = rowners[rank];
2216   rend   = rowners[rank+1];
2217 
2218   /* distribute row lengths to all processors */
2219   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2220   if (!rank) {
2221     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2222     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2223     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2224     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2225     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2226     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2227     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2228   } else {
2229     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2230   }
2231 
2232   if (!rank) {   /* procs[0] */
2233     /* calculate the number of nonzeros on each processor */
2234     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2235     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2236     for (i=0; i<size; i++) {
2237       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2238         procsnz[i] += rowlengths[j];
2239       }
2240     }
2241     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2242 
2243     /* determine max buffer needed and allocate it */
2244     maxnz = 0;
2245     for (i=0; i<size; i++) {
2246       maxnz = PetscMax(maxnz,procsnz[i]);
2247     }
2248     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2249 
2250     /* read in my part of the matrix column indices  */
2251     nz     = procsnz[0];
2252     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2253     mycols = ibuf;
2254     if (size == 1)  nz -= extra_rows;
2255     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2256     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2257 
2258     /* read in every ones (except the last) and ship off */
2259     for (i=1; i<size-1; i++) {
2260       nz   = procsnz[i];
2261       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2262       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2263     }
2264     /* read in the stuff for the last proc */
2265     if (size != 1) {
2266       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2267       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2268       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2269       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2270     }
2271     ierr = PetscFree(cols);CHKERRQ(ierr);
2272   } else {  /* procs[i], i>0 */
2273     /* determine buffer space needed for message */
2274     nz = 0;
2275     for (i=0; i<m; i++) {
2276       nz += locrowlens[i];
2277     }
2278     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2279     mycols = ibuf;
2280     /* receive message of column indices*/
2281     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2282     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2283     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2284   }
2285 
2286   /* loop over local rows, determining number of off diagonal entries */
2287   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2288   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2289   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2290   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2291   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2292   rowcount = 0;
2293   nzcount  = 0;
2294   for (i=0; i<mbs; i++) {
2295     dcount  = 0;
2296     odcount = 0;
2297     for (j=0; j<bs; j++) {
2298       kmax = locrowlens[rowcount];
2299       for (k=0; k<kmax; k++) {
2300         tmp = mycols[nzcount++]/bs; /* block col. index */
2301         if (!mask[tmp]) {
2302           mask[tmp] = 1;
2303           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2304           else masked1[dcount++] = tmp; /* entry in diag portion */
2305         }
2306       }
2307       rowcount++;
2308     }
2309 
2310     dlens[i]  = dcount;  /* d_nzz[i] */
2311     odlens[i] = odcount; /* o_nzz[i] */
2312 
2313     /* zero out the mask elements we set */
2314     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2315     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2316   }
2317 
2318   /* create our matrix */
2319   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2320   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2321   ierr = MatSetType(A,type);CHKERRQ(ierr);
2322   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2323   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2324 
2325   if (!rank) {
2326     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2327     /* read in my part of the matrix numerical values  */
2328     nz = procsnz[0];
2329     vals = buf;
2330     mycols = ibuf;
2331     if (size == 1)  nz -= extra_rows;
2332     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2333     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2334 
2335     /* insert into matrix */
2336     jj      = rstart*bs;
2337     for (i=0; i<m; i++) {
2338       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2339       mycols += locrowlens[i];
2340       vals   += locrowlens[i];
2341       jj++;
2342     }
2343 
2344     /* read in other processors (except the last one) and ship out */
2345     for (i=1; i<size-1; i++) {
2346       nz   = procsnz[i];
2347       vals = buf;
2348       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2349       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2350     }
2351     /* the last proc */
2352     if (size != 1){
2353       nz   = procsnz[i] - extra_rows;
2354       vals = buf;
2355       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2356       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2357       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2358     }
2359     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2360 
2361   } else {
2362     /* receive numeric values */
2363     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2364 
2365     /* receive message of values*/
2366     vals   = buf;
2367     mycols = ibuf;
2368     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2369     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2370     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2371 
2372     /* insert into matrix */
2373     jj      = rstart*bs;
2374     for (i=0; i<m; i++) {
2375       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2376       mycols += locrowlens[i];
2377       vals   += locrowlens[i];
2378       jj++;
2379     }
2380   }
2381 
2382   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2383   ierr = PetscFree(buf);CHKERRQ(ierr);
2384   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2385   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2386   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2387   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2388   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2390   *newmat = A;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2396 /*XXXXX@
2397    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2398 
2399    Input Parameters:
2400 .  mat  - the matrix
2401 .  fact - factor
2402 
2403    Collective on Mat
2404 
2405    Level: advanced
2406 
2407   Notes:
2408    This can also be set by the command line option: -mat_use_hash_table fact
2409 
2410 .keywords: matrix, hashtable, factor, HT
2411 
2412 .seealso: MatSetOption()
2413 @XXXXX*/
2414 
2415 
2416 #undef __FUNCT__
2417 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2418 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2419 {
2420   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2421   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2422   PetscReal      atmp;
2423   PetscReal      *work,*svalues,*rvalues;
2424   PetscErrorCode ierr;
2425   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2426   PetscMPIInt    rank,size;
2427   PetscInt       *rowners_bs,dest,count,source;
2428   PetscScalar    *va;
2429   MatScalar      *ba;
2430   MPI_Status     stat;
2431 
2432   PetscFunctionBegin;
2433   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2434   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2435   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2436 
2437   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2438   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2439 
2440   bs   = A->rmap->bs;
2441   mbs  = a->mbs;
2442   Mbs  = a->Mbs;
2443   ba   = b->a;
2444   bi   = b->i;
2445   bj   = b->j;
2446 
2447   /* find ownerships */
2448   rowners_bs = A->rmap->range;
2449 
2450   /* each proc creates an array to be distributed */
2451   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2452   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2453 
2454   /* row_max for B */
2455   if (rank != size-1){
2456     for (i=0; i<mbs; i++) {
2457       ncols = bi[1] - bi[0]; bi++;
2458       brow  = bs*i;
2459       for (j=0; j<ncols; j++){
2460         bcol = bs*(*bj);
2461         for (kcol=0; kcol<bs; kcol++){
2462           col = bcol + kcol;                 /* local col index */
2463           col += rowners_bs[rank+1];      /* global col index */
2464           for (krow=0; krow<bs; krow++){
2465             atmp = PetscAbsScalar(*ba); ba++;
2466             row = brow + krow;    /* local row index */
2467             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2468             if (work[col] < atmp) work[col] = atmp;
2469           }
2470         }
2471         bj++;
2472       }
2473     }
2474 
2475     /* send values to its owners */
2476     for (dest=rank+1; dest<size; dest++){
2477       svalues = work + rowners_bs[dest];
2478       count   = rowners_bs[dest+1]-rowners_bs[dest];
2479       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2480     }
2481   }
2482 
2483   /* receive values */
2484   if (rank){
2485     rvalues = work;
2486     count   = rowners_bs[rank+1]-rowners_bs[rank];
2487     for (source=0; source<rank; source++){
2488       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2489       /* process values */
2490       for (i=0; i<count; i++){
2491         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2492       }
2493     }
2494   }
2495 
2496   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2497   ierr = PetscFree(work);CHKERRQ(ierr);
2498   PetscFunctionReturn(0);
2499 }
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatSOR_MPISBAIJ"
2503 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2504 {
2505   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2506   PetscErrorCode ierr;
2507   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2508   PetscScalar    *x,*b,*ptr,*from;
2509   Vec            bb1;
2510 
2511   PetscFunctionBegin;
2512   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2513   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2514 
2515   if (flag == SOR_APPLY_UPPER) {
2516     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2517     PetscFunctionReturn(0);
2518   }
2519 
2520   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2521     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2522       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2523       its--;
2524     }
2525 
2526     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2527     while (its--){
2528 
2529       /* lower triangular part: slvec0b = - B^T*xx */
2530       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2531 
2532       /* copy xx into slvec0a */
2533       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2534       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2535       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2536       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2537 
2538       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2539 
2540       /* copy bb into slvec1a */
2541       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2542       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2543       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2544       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2545 
2546       /* set slvec1b = 0 */
2547       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2548 
2549       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2550       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2551       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2552       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2553 
2554       /* upper triangular part: bb1 = bb1 - B*x */
2555       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2556 
2557       /* local diagonal sweep */
2558       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2559     }
2560     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2561   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2562     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2563   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2564     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2565   } else if (flag & SOR_EISENSTAT) {
2566     Vec               xx1;
2567     PetscTruth        hasop;
2568     const PetscScalar *diag;
2569     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2570     PetscInt          i,n;
2571 
2572     if (!mat->xx1) {
2573       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2574       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2575     }
2576     xx1 = mat->xx1;
2577     bb1 = mat->bb1;
2578 
2579     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2580 
2581     if (!mat->diag) {
2582       /* this is wrong for same matrix with new nonzero values */
2583       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2584       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2585     }
2586     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2587 
2588     if (hasop) {
2589       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2590       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2591     } else {
2592       /*
2593           These two lines are replaced by code that may be a bit faster for a good compiler
2594       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2595       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2596       */
2597       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2598       ierr = VecGetArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2599       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2600       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2601       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2602       if (omega == 1.0) {
2603 	for (i=0; i<n; i++) {
2604 	  sl[i] = b[i] - diag[i]*x[i];
2605 	}
2606         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2607       } else {
2608 	for (i=0; i<n; i++) {
2609 	  sl[i] = b[i] + scale*diag[i]*x[i];
2610 	}
2611         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2612       }
2613       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2614       ierr = VecRestoreArray(mat->diag,(PetscScalar**)&diag);CHKERRQ(ierr);
2615       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2616       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2617     }
2618 
2619     /* multiply off-diagonal portion of matrix */
2620     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2621     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2622     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2623     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2624     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2625     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2626     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2627     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2628     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2629     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2630 
2631     /* local sweep */
2632     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2633     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2634   } else {
2635     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2636   }
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 #undef __FUNCT__
2641 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2642 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2643 {
2644   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2645   PetscErrorCode ierr;
2646   Vec            lvec1,bb1;
2647 
2648   PetscFunctionBegin;
2649   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2650   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2651 
2652   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2653     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2654       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2655       its--;
2656     }
2657 
2658     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2659     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2660     while (its--){
2661       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2662 
2663       /* lower diagonal part: bb1 = bb - B^T*xx */
2664       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2665       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2666 
2667       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2668       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2669       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2670 
2671       /* upper diagonal part: bb1 = bb1 - B*x */
2672       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2673       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2674 
2675       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2676 
2677       /* diagonal sweep */
2678       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2679     }
2680     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2681     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2682   } else {
2683     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2684   }
2685   PetscFunctionReturn(0);
2686 }
2687 
2688