xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 67d6f15001b9f61831e287c21d28fe6f41dfd6aa)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscTruth     roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend){
379           col = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined (PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data --;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else {
399       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
400     }
401   }
402  PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatNorm_MPISBAIJ"
407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
408 {
409   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscReal      sum[2],*lnorm2;
412 
413   PetscFunctionBegin;
414   if (baij->size == 1) {
415     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
416   } else {
417     if (type == NORM_FROBENIUS) {
418       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
419       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
421       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
423       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
424       *norm = sqrt(sum[0] + 2*sum[1]);
425       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
426     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
427       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
428       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
429       PetscReal    *rsum,*rsum2,vabs;
430       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
431       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
432       MatScalar    *v;
433 
434       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
435       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
436       /* Amat */
437       v = amat->a; jj = amat->j;
438       for (brow=0; brow<mbs; brow++) {
439         grow = bs*(rstart + brow);
440         nz = amat->i[brow+1] - amat->i[brow];
441         for (bcol=0; bcol<nz; bcol++){
442           gcol = bs*(rstart + *jj); jj++;
443           for (col=0; col<bs; col++){
444             for (row=0; row<bs; row++){
445               vabs = PetscAbsScalar(*v); v++;
446               rsum[gcol+col] += vabs;
447               /* non-diagonal block */
448               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
449             }
450           }
451         }
452       }
453       /* Bmat */
454       v = bmat->a; jj = bmat->j;
455       for (brow=0; brow<mbs; brow++) {
456         grow = bs*(rstart + brow);
457         nz = bmat->i[brow+1] - bmat->i[brow];
458         for (bcol=0; bcol<nz; bcol++){
459           gcol = bs*garray[*jj]; jj++;
460           for (col=0; col<bs; col++){
461             for (row=0; row<bs; row++){
462               vabs = PetscAbsScalar(*v); v++;
463               rsum[gcol+col] += vabs;
464               rsum[grow+row] += vabs;
465             }
466           }
467         }
468       }
469       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
470       *norm = 0.0;
471       for (col=0; col<mat->cmap->N; col++) {
472         if (rsum2[col] > *norm) *norm = rsum2[col];
473       }
474       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
475     } else {
476       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
477     }
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
485 {
486   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
487   PetscErrorCode ierr;
488   PetscInt       nstash,reallocs;
489   InsertMode     addv;
490 
491   PetscFunctionBegin;
492   if (baij->donotstash) {
493     PetscFunctionReturn(0);
494   }
495 
496   /* make sure all processors are either in INSERTMODE or ADDMODE */
497   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
498   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807 
808   /* diagonal part */
809   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
810   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
811 
812   /* subdiagonal part */
813   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
814 
815   /* copy x into the vec slvec0 */
816   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
820   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   /* supperdiagonal part */
826   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMult_MPISBAIJ"
832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
833 {
834   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
835   PetscErrorCode ierr;
836   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
837   PetscScalar    *x,*from;
838 
839   PetscFunctionBegin;
840   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
841   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
842 
843   /* diagonal part */
844   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
845   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
846 
847   /* subdiagonal part */
848   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
849 
850   /* copy x into the vec slvec0 */
851   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
855   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* supperdiagonal part */
861   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
868 {
869   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
870   PetscErrorCode ierr;
871   PetscInt       nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876 
877   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
878   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
879 
880   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   /* do diagonal part */
882   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
883   /* do supperdiagonal part */
884   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
886   /* do subdiagonal part */
887   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
888   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890 
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
897 {
898   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
899   PetscErrorCode ierr;
900   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
901   PetscScalar    *x,*from,zero=0.0;
902 
903   PetscFunctionBegin;
904   /*
905   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
906   PetscSynchronizedFlush(((PetscObject)A)->comm);
907   */
908   /* diagonal part */
909   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
910   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
911 
912   /* subdiagonal part */
913   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
914 
915   /* copy x into the vec slvec0 */
916   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
917   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
918   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
919   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
920 
921   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
923   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924 
925   /* supperdiagonal part */
926   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   /* do diagonal part */
941   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
942   /* do supperdiagonal part */
943   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
945 
946   /* do subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
948   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950 
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) { max = tmp; }
1009     }
1010     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap  = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart)
1047               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1048             else break;
1049           }
1050           imark = i;
1051         }
1052         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1053         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054       }
1055     } else {
1056       if (idx) *idx = 0;
1057       if (v)   *v   = 0;
1058     }
1059   }
1060   *nz = nztot;
1061   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1069 {
1070   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1071 
1072   PetscFunctionBegin;
1073   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1074   baij->getrowactive = PETSC_FALSE;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1081 {
1082   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1083   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1084 
1085   PetscFunctionBegin;
1086   aA->getrow_utriangular = PETSC_TRUE;
1087   PetscFunctionReturn(0);
1088 }
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1092 {
1093   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1094   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1095 
1096   PetscFunctionBegin;
1097   aA->getrow_utriangular = PETSC_FALSE;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1110   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1117 {
1118   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1123   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1130 {
1131   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1136   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1143 {
1144   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1145   Mat            A = a->A,B = a->B;
1146   PetscErrorCode ierr;
1147   PetscReal      isend[5],irecv[5];
1148 
1149   PetscFunctionBegin;
1150   info->block_size     = (PetscReal)matin->rmap->bs;
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153   isend[3] = info->memory;  isend[4] = info->mallocs;
1154   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1156   isend[3] += info->memory;  isend[4] += info->mallocs;
1157   if (flag == MAT_LOCAL) {
1158     info->nz_used      = isend[0];
1159     info->nz_allocated = isend[1];
1160     info->nz_unneeded  = isend[2];
1161     info->memory       = isend[3];
1162     info->mallocs      = isend[4];
1163   } else if (flag == MAT_GLOBAL_MAX) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   } else if (flag == MAT_GLOBAL_SUM) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else {
1178     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1179   }
1180   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1181   info->fill_ratio_needed = 0;
1182   info->factor_mallocs    = 0;
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1189 {
1190   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1191   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   switch (op) {
1196   case MAT_NEW_NONZERO_LOCATIONS:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1199   case MAT_KEEP_NONZERO_PATTERN:
1200   case MAT_NEW_NONZERO_LOCATION_ERR:
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = flg;
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_NEW_DIAGONALS:
1210     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1211     break;
1212   case MAT_IGNORE_OFF_PROC_ENTRIES:
1213     a->donotstash = flg;
1214     break;
1215   case MAT_USE_HASH_TABLE:
1216     a->ht_flag = flg;
1217     break;
1218   case MAT_HERMITIAN:
1219     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1220     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1221     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1222     break;
1223   case MAT_SYMMETRIC:
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225     break;
1226   case MAT_STRUCTURALLY_SYMMETRIC:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   case MAT_SYMMETRY_ETERNAL:
1230     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1231     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1232     break;
1233   case MAT_IGNORE_LOWER_TRIANGULAR:
1234     aA->ignore_ltriangular = flg;
1235     break;
1236   case MAT_ERROR_LOWER_TRIANGULAR:
1237     aA->ignore_ltriangular = flg;
1238     break;
1239   case MAT_GETROW_UPPERTRIANGULAR:
1240     aA->getrow_utriangular = flg;
1241     break;
1242   default:
1243     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1244   }
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1250 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1251 {
1252   PetscErrorCode ierr;
1253   PetscFunctionBegin;
1254   if (MAT_INITIAL_MATRIX || *B != A) {
1255     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1262 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1263 {
1264   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1265   Mat            a=baij->A, b=baij->B;
1266   PetscErrorCode ierr;
1267   PetscInt       nv,m,n;
1268   PetscTruth     flg;
1269 
1270   PetscFunctionBegin;
1271   if (ll != rr){
1272     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1273     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1274   }
1275   if (!ll) PetscFunctionReturn(0);
1276 
1277   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1278   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1279 
1280   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1281   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1282 
1283   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284 
1285   /* left diagonalscale the off-diagonal part */
1286   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1287 
1288   /* scale the diagonal part */
1289   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1290 
1291   /* right diagonalscale the off-diagonal part */
1292   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1299 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1300 {
1301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatEqual_MPISBAIJ"
1313 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1314 {
1315   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1316   Mat            a,b,c,d;
1317   PetscTruth     flg;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   a = matA->A; b = matA->B;
1322   c = matB->A; d = matB->B;
1323 
1324   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1325   if (flg) {
1326     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1327   }
1328   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatCopy_MPISBAIJ"
1334 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1335 {
1336   PetscErrorCode ierr;
1337   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1338   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1339 
1340   PetscFunctionBegin;
1341   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1342   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1343     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1344     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1345     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1348     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1355 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1356 {
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1366 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1367 {
1368   PetscErrorCode ierr;
1369   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1370   PetscBLASInt   bnz,one=1;
1371   Mat_SeqSBAIJ   *xa,*ya;
1372   Mat_SeqBAIJ    *xb,*yb;
1373 
1374   PetscFunctionBegin;
1375   if (str == SAME_NONZERO_PATTERN) {
1376     PetscScalar alpha = a;
1377     xa = (Mat_SeqSBAIJ *)xx->A->data;
1378     ya = (Mat_SeqSBAIJ *)yy->A->data;
1379     bnz = PetscBLASIntCast(xa->nz);
1380     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1381     xb = (Mat_SeqBAIJ *)xx->B->data;
1382     yb = (Mat_SeqBAIJ *)yy->B->data;
1383     bnz = PetscBLASIntCast(xb->nz);
1384     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1385   } else {
1386     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1387     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1388     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1395 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1396 {
1397   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1398   PetscInt        rbs,cbs;
1399   PetscErrorCode  ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1403   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1404   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1405   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1406   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1407   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1414 {
1415   PetscErrorCode ierr;
1416   PetscInt       i;
1417   PetscTruth     flg;
1418 
1419   PetscFunctionBegin;
1420   for (i=0; i<n; i++) {
1421     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1422     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1423   }
1424   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 /* -------------------------------------------------------------------*/
1430 static struct _MatOps MatOps_Values = {
1431        MatSetValues_MPISBAIJ,
1432        MatGetRow_MPISBAIJ,
1433        MatRestoreRow_MPISBAIJ,
1434        MatMult_MPISBAIJ,
1435 /* 4*/ MatMultAdd_MPISBAIJ,
1436        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1437        MatMultAdd_MPISBAIJ,
1438        0,
1439        0,
1440        0,
1441 /*10*/ 0,
1442        0,
1443        0,
1444        MatSOR_MPISBAIJ,
1445        MatTranspose_MPISBAIJ,
1446 /*15*/ MatGetInfo_MPISBAIJ,
1447        MatEqual_MPISBAIJ,
1448        MatGetDiagonal_MPISBAIJ,
1449        MatDiagonalScale_MPISBAIJ,
1450        MatNorm_MPISBAIJ,
1451 /*20*/ MatAssemblyBegin_MPISBAIJ,
1452        MatAssemblyEnd_MPISBAIJ,
1453        MatSetOption_MPISBAIJ,
1454        MatZeroEntries_MPISBAIJ,
1455 /*24*/ 0,
1456        0,
1457        0,
1458        0,
1459        0,
1460 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1461        0,
1462        0,
1463        0,
1464        0,
1465 /*34*/ MatDuplicate_MPISBAIJ,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*39*/ MatAXPY_MPISBAIJ,
1471        MatGetSubMatrices_MPISBAIJ,
1472        MatIncreaseOverlap_MPISBAIJ,
1473        MatGetValues_MPISBAIJ,
1474        MatCopy_MPISBAIJ,
1475 /*44*/ 0,
1476        MatScale_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480 /*49*/ MatSetBlockSize_MPISBAIJ,
1481        0,
1482        0,
1483        0,
1484        0,
1485 /*54*/ 0,
1486        0,
1487        MatSetUnfactored_MPISBAIJ,
1488        0,
1489        MatSetValuesBlocked_MPISBAIJ,
1490 /*59*/ 0,
1491        0,
1492        0,
1493        0,
1494        0,
1495 /*64*/ 0,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*74*/ 0,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*79*/ 0,
1511        0,
1512        0,
1513        0,
1514        MatLoad_MPISBAIJ,
1515 /*84*/ 0,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*89*/ 0,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*94*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*99*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*104*/0,
1536        MatRealPart_MPISBAIJ,
1537        MatImaginaryPart_MPISBAIJ,
1538        MatGetRowUpperTriangular_MPISBAIJ,
1539        MatRestoreRowUpperTriangular_MPISBAIJ
1540 };
1541 
1542 
1543 EXTERN_C_BEGIN
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1546 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1547 {
1548   PetscFunctionBegin;
1549   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1550   *iscopy = PETSC_FALSE;
1551   PetscFunctionReturn(0);
1552 }
1553 EXTERN_C_END
1554 
1555 EXTERN_C_BEGIN
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1558 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1559 {
1560   Mat_MPISBAIJ   *b;
1561   PetscErrorCode ierr;
1562   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1563 
1564   PetscFunctionBegin;
1565   if (bs < 0){
1566     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1567       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1568     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1569     bs   = PetscAbs(bs);
1570   }
1571   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1572   bs = newbs;
1573 
1574   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1575   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1576   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1577   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1578 
1579   B->rmap->bs = B->cmap->bs = bs;
1580   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1581   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1582 
1583   if (d_nnz) {
1584     for (i=0; i<B->rmap->n/bs; i++) {
1585       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1586     }
1587   }
1588   if (o_nnz) {
1589     for (i=0; i<B->rmap->n/bs; i++) {
1590       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1591     }
1592   }
1593 
1594   b   = (Mat_MPISBAIJ*)B->data;
1595   mbs = B->rmap->n/bs;
1596   Mbs = B->rmap->N/bs;
1597   if (mbs*bs != B->rmap->n) {
1598     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1599   }
1600 
1601   B->rmap->bs  = bs;
1602   b->bs2 = bs*bs;
1603   b->mbs = mbs;
1604   b->nbs = mbs;
1605   b->Mbs = Mbs;
1606   b->Nbs = Mbs;
1607 
1608   for (i=0; i<=b->size; i++) {
1609     b->rangebs[i] = B->rmap->range[i]/bs;
1610   }
1611   b->rstartbs = B->rmap->rstart/bs;
1612   b->rendbs   = B->rmap->rend/bs;
1613 
1614   b->cstartbs = B->cmap->rstart/bs;
1615   b->cendbs   = B->cmap->rend/bs;
1616 
1617   if (!B->preallocated) {
1618     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1619     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1620     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1621     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1622     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1623     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1624     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1625     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1626     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1627   }
1628 
1629   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1630   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1631   B->preallocated = PETSC_TRUE;
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 EXTERN_C_BEGIN
1637 #if defined(PETSC_HAVE_MUMPS)
1638 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1639 #endif
1640 #if defined(PETSC_HAVE_SPOOLES)
1641 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1642 #endif
1643 #if defined(PETSC_HAVE_PASTIX)
1644 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1645 #endif
1646 EXTERN_C_END
1647 
1648 /*MC
1649    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1650    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1651 
1652    Options Database Keys:
1653 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1654 
1655   Level: beginner
1656 
1657 .seealso: MatCreateMPISBAIJ
1658 M*/
1659 
1660 EXTERN_C_BEGIN
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatCreate_MPISBAIJ"
1663 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1664 {
1665   Mat_MPISBAIJ   *b;
1666   PetscErrorCode ierr;
1667   PetscTruth     flg;
1668 
1669   PetscFunctionBegin;
1670 
1671   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1672   B->data = (void*)b;
1673   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1674 
1675   B->ops->destroy    = MatDestroy_MPISBAIJ;
1676   B->ops->view       = MatView_MPISBAIJ;
1677   B->mapping         = 0;
1678   B->assembled       = PETSC_FALSE;
1679 
1680   B->insertmode = NOT_SET_VALUES;
1681   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1682   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1683 
1684   /* build local table of row and column ownerships */
1685   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1686 
1687   /* build cache for off array entries formed */
1688   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1689   b->donotstash  = PETSC_FALSE;
1690   b->colmap      = PETSC_NULL;
1691   b->garray      = PETSC_NULL;
1692   b->roworiented = PETSC_TRUE;
1693 
1694   /* stuff used in block assembly */
1695   b->barray       = 0;
1696 
1697   /* stuff used for matrix vector multiply */
1698   b->lvec         = 0;
1699   b->Mvctx        = 0;
1700   b->slvec0       = 0;
1701   b->slvec0b      = 0;
1702   b->slvec1       = 0;
1703   b->slvec1a      = 0;
1704   b->slvec1b      = 0;
1705   b->sMvctx       = 0;
1706 
1707   /* stuff for MatGetRow() */
1708   b->rowindices   = 0;
1709   b->rowvalues    = 0;
1710   b->getrowactive = PETSC_FALSE;
1711 
1712   /* hash table stuff */
1713   b->ht           = 0;
1714   b->hd           = 0;
1715   b->ht_size      = 0;
1716   b->ht_flag      = PETSC_FALSE;
1717   b->ht_fact      = 0;
1718   b->ht_total_ct  = 0;
1719   b->ht_insert_ct = 0;
1720 
1721   b->in_loc       = 0;
1722   b->v_loc        = 0;
1723   b->n_loc        = 0;
1724   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1725     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1726     if (flg) {
1727       PetscReal fact = 1.39;
1728       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1729       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1730       if (fact <= 1.0) fact = 1.39;
1731       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1732       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1733     }
1734   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1735 
1736 #if defined(PETSC_HAVE_PASTIX)
1737   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1738 					   "MatGetFactor_mpisbaij_pastix",
1739 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1740 #endif
1741 #if defined(PETSC_HAVE_MUMPS)
1742   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1743                                      "MatGetFactor_sbaij_mumps",
1744                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1745 #endif
1746 #if defined(PETSC_HAVE_SPOOLES)
1747   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1748                                      "MatGetFactor_mpisbaij_spooles",
1749                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1750 #endif
1751   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1752                                      "MatStoreValues_MPISBAIJ",
1753                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1754   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1755                                      "MatRetrieveValues_MPISBAIJ",
1756                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1757   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1758                                      "MatGetDiagonalBlock_MPISBAIJ",
1759                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1760   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1761                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1762                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1763   B->symmetric                  = PETSC_TRUE;
1764   B->structurally_symmetric     = PETSC_TRUE;
1765   B->symmetric_set              = PETSC_TRUE;
1766   B->structurally_symmetric_set = PETSC_TRUE;
1767   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 EXTERN_C_END
1771 
1772 /*MC
1773    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1774 
1775    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1776    and MATMPISBAIJ otherwise.
1777 
1778    Options Database Keys:
1779 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1780 
1781   Level: beginner
1782 
1783 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1784 M*/
1785 
1786 EXTERN_C_BEGIN
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatCreate_SBAIJ"
1789 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1790 {
1791   PetscErrorCode ierr;
1792   PetscMPIInt    size;
1793 
1794   PetscFunctionBegin;
1795   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1796   if (size == 1) {
1797     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1798   } else {
1799     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1800   }
1801   PetscFunctionReturn(0);
1802 }
1803 EXTERN_C_END
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1807 /*@C
1808    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1809    the user should preallocate the matrix storage by setting the parameters
1810    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1811    performance can be increased by more than a factor of 50.
1812 
1813    Collective on Mat
1814 
1815    Input Parameters:
1816 +  A - the matrix
1817 .  bs   - size of blockk
1818 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1819            submatrix  (same for all local rows)
1820 .  d_nnz - array containing the number of block nonzeros in the various block rows
1821            in the upper triangular and diagonal part of the in diagonal portion of the local
1822            (possibly different for each block row) or PETSC_NULL.  You must leave room
1823            for the diagonal entry even if it is zero.
1824 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1825            submatrix (same for all local rows).
1826 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1827            off-diagonal portion of the local submatrix (possibly different for
1828            each block row) or PETSC_NULL.
1829 
1830 
1831    Options Database Keys:
1832 .   -mat_no_unroll - uses code that does not unroll the loops in the
1833                      block calculations (much slower)
1834 .   -mat_block_size - size of the blocks to use
1835 
1836    Notes:
1837 
1838    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1839    than it must be used on all processors that share the object for that argument.
1840 
1841    If the *_nnz parameter is given then the *_nz parameter is ignored
1842 
1843    Storage Information:
1844    For a square global matrix we define each processor's diagonal portion
1845    to be its local rows and the corresponding columns (a square submatrix);
1846    each processor's off-diagonal portion encompasses the remainder of the
1847    local matrix (a rectangular submatrix).
1848 
1849    The user can specify preallocated storage for the diagonal part of
1850    the local submatrix with either d_nz or d_nnz (not both).  Set
1851    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1852    memory allocation.  Likewise, specify preallocated storage for the
1853    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1854 
1855    You can call MatGetInfo() to get information on how effective the preallocation was;
1856    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1857    You can also run with the option -info and look for messages with the string
1858    malloc in them to see if additional memory allocation was needed.
1859 
1860    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1861    the figure below we depict these three local rows and all columns (0-11).
1862 
1863 .vb
1864            0 1 2 3 4 5 6 7 8 9 10 11
1865           -------------------
1866    row 3  |  o o o d d d o o o o o o
1867    row 4  |  o o o d d d o o o o o o
1868    row 5  |  o o o d d d o o o o o o
1869           -------------------
1870 .ve
1871 
1872    Thus, any entries in the d locations are stored in the d (diagonal)
1873    submatrix, and any entries in the o locations are stored in the
1874    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1875    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1876 
1877    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1878    plus the diagonal part of the d matrix,
1879    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1880    In general, for PDE problems in which most nonzeros are near the diagonal,
1881    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1882    or you will get TERRIBLE performance; see the users' manual chapter on
1883    matrices.
1884 
1885    Level: intermediate
1886 
1887 .keywords: matrix, block, aij, compressed row, sparse, parallel
1888 
1889 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1890 @*/
1891 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1892 {
1893   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1894 
1895   PetscFunctionBegin;
1896   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1897   if (f) {
1898     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatCreateMPISBAIJ"
1905 /*@C
1906    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1907    (block compressed row).  For good matrix assembly performance
1908    the user should preallocate the matrix storage by setting the parameters
1909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1910    performance can be increased by more than a factor of 50.
1911 
1912    Collective on MPI_Comm
1913 
1914    Input Parameters:
1915 +  comm - MPI communicator
1916 .  bs   - size of blockk
1917 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1918            This value should be the same as the local size used in creating the
1919            y vector for the matrix-vector product y = Ax.
1920 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1921            This value should be the same as the local size used in creating the
1922            x vector for the matrix-vector product y = Ax.
1923 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1924 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1925 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1926            submatrix  (same for all local rows)
1927 .  d_nnz - array containing the number of block nonzeros in the various block rows
1928            in the upper triangular portion of the in diagonal portion of the local
1929            (possibly different for each block block row) or PETSC_NULL.
1930            You must leave room for the diagonal entry even if it is zero.
1931 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1932            submatrix (same for all local rows).
1933 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1934            off-diagonal portion of the local submatrix (possibly different for
1935            each block row) or PETSC_NULL.
1936 
1937    Output Parameter:
1938 .  A - the matrix
1939 
1940    Options Database Keys:
1941 .   -mat_no_unroll - uses code that does not unroll the loops in the
1942                      block calculations (much slower)
1943 .   -mat_block_size - size of the blocks to use
1944 .   -mat_mpi - use the parallel matrix data structures even on one processor
1945                (defaults to using SeqBAIJ format on one processor)
1946 
1947    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1948    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1949    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1950 
1951    Notes:
1952    The number of rows and columns must be divisible by blocksize.
1953    This matrix type does not support complex Hermitian operation.
1954 
1955    The user MUST specify either the local or global matrix dimensions
1956    (possibly both).
1957 
1958    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1959    than it must be used on all processors that share the object for that argument.
1960 
1961    If the *_nnz parameter is given then the *_nz parameter is ignored
1962 
1963    Storage Information:
1964    For a square global matrix we define each processor's diagonal portion
1965    to be its local rows and the corresponding columns (a square submatrix);
1966    each processor's off-diagonal portion encompasses the remainder of the
1967    local matrix (a rectangular submatrix).
1968 
1969    The user can specify preallocated storage for the diagonal part of
1970    the local submatrix with either d_nz or d_nnz (not both).  Set
1971    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1972    memory allocation.  Likewise, specify preallocated storage for the
1973    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1974 
1975    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1976    the figure below we depict these three local rows and all columns (0-11).
1977 
1978 .vb
1979            0 1 2 3 4 5 6 7 8 9 10 11
1980           -------------------
1981    row 3  |  o o o d d d o o o o o o
1982    row 4  |  o o o d d d o o o o o o
1983    row 5  |  o o o d d d o o o o o o
1984           -------------------
1985 .ve
1986 
1987    Thus, any entries in the d locations are stored in the d (diagonal)
1988    submatrix, and any entries in the o locations are stored in the
1989    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1990    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1991 
1992    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1993    plus the diagonal part of the d matrix,
1994    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1995    In general, for PDE problems in which most nonzeros are near the diagonal,
1996    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1997    or you will get TERRIBLE performance; see the users' manual chapter on
1998    matrices.
1999 
2000    Level: intermediate
2001 
2002 .keywords: matrix, block, aij, compressed row, sparse, parallel
2003 
2004 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2005 @*/
2006 
2007 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2008 {
2009   PetscErrorCode ierr;
2010   PetscMPIInt    size;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2014   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2015   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2016   if (size > 1) {
2017     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2018     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2019   } else {
2020     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2021     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2029 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2030 {
2031   Mat            mat;
2032   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2033   PetscErrorCode ierr;
2034   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2035   PetscScalar    *array;
2036 
2037   PetscFunctionBegin;
2038   *newmat       = 0;
2039   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2040   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2041   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2042   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2043   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2044   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2045 
2046   mat->factortype   = matin->factortype;
2047   mat->preallocated = PETSC_TRUE;
2048   mat->assembled    = PETSC_TRUE;
2049   mat->insertmode   = NOT_SET_VALUES;
2050 
2051   a = (Mat_MPISBAIJ*)mat->data;
2052   a->bs2   = oldmat->bs2;
2053   a->mbs   = oldmat->mbs;
2054   a->nbs   = oldmat->nbs;
2055   a->Mbs   = oldmat->Mbs;
2056   a->Nbs   = oldmat->Nbs;
2057 
2058 
2059   a->size         = oldmat->size;
2060   a->rank         = oldmat->rank;
2061   a->donotstash   = oldmat->donotstash;
2062   a->roworiented  = oldmat->roworiented;
2063   a->rowindices   = 0;
2064   a->rowvalues    = 0;
2065   a->getrowactive = PETSC_FALSE;
2066   a->barray       = 0;
2067   a->rstartbs    = oldmat->rstartbs;
2068   a->rendbs      = oldmat->rendbs;
2069   a->cstartbs    = oldmat->cstartbs;
2070   a->cendbs      = oldmat->cendbs;
2071 
2072   /* hash table stuff */
2073   a->ht           = 0;
2074   a->hd           = 0;
2075   a->ht_size      = 0;
2076   a->ht_flag      = oldmat->ht_flag;
2077   a->ht_fact      = oldmat->ht_fact;
2078   a->ht_total_ct  = 0;
2079   a->ht_insert_ct = 0;
2080 
2081   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2082   if (oldmat->colmap) {
2083 #if defined (PETSC_USE_CTABLE)
2084     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2085 #else
2086     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2087     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2088     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2089 #endif
2090   } else a->colmap = 0;
2091 
2092   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2093     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2094     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2095     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2096   } else a->garray = 0;
2097 
2098   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2099   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2100   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2101   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2102   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2103 
2104   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2105   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2106   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2107   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2108 
2109   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2110   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2111   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2113   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2114   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2115   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2116   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2117   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2122 
2123   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2124   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2125   a->sMvctx = oldmat->sMvctx;
2126   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2127 
2128   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2129   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2130   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2132   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2133   *newmat = mat;
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatLoad_MPISBAIJ"
2139 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2140 {
2141   Mat            A;
2142   PetscErrorCode ierr;
2143   PetscInt       i,nz,j,rstart,rend;
2144   PetscScalar    *vals,*buf;
2145   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2146   MPI_Status     status;
2147   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2148   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2149   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2150   PetscInt       bs=1,Mbs,mbs,extra_rows;
2151   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2152   PetscInt       dcount,kmax,k,nzcount,tmp;
2153   int            fd;
2154 
2155   PetscFunctionBegin;
2156   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2157     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2158   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2159 
2160   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2161   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2162   if (!rank) {
2163     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2164     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2165     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2166     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2167   }
2168 
2169   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2170   M = header[1]; N = header[2];
2171 
2172   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2173 
2174   /*
2175      This code adds extra rows to make sure the number of rows is
2176      divisible by the blocksize
2177   */
2178   Mbs        = M/bs;
2179   extra_rows = bs - M + bs*(Mbs);
2180   if (extra_rows == bs) extra_rows = 0;
2181   else                  Mbs++;
2182   if (extra_rows &&!rank) {
2183     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2184   }
2185 
2186   /* determine ownership of all rows */
2187   mbs        = Mbs/size + ((Mbs % size) > rank);
2188   m          = mbs*bs;
2189   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2190   mmbs       = PetscMPIIntCast(mbs);
2191   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2192   rowners[0] = 0;
2193   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2194   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2195   rstart = rowners[rank];
2196   rend   = rowners[rank+1];
2197 
2198   /* distribute row lengths to all processors */
2199   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2200   if (!rank) {
2201     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2202     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2203     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2204     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2205     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2206     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2207     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2208   } else {
2209     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2210   }
2211 
2212   if (!rank) {   /* procs[0] */
2213     /* calculate the number of nonzeros on each processor */
2214     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2215     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2216     for (i=0; i<size; i++) {
2217       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2218         procsnz[i] += rowlengths[j];
2219       }
2220     }
2221     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2222 
2223     /* determine max buffer needed and allocate it */
2224     maxnz = 0;
2225     for (i=0; i<size; i++) {
2226       maxnz = PetscMax(maxnz,procsnz[i]);
2227     }
2228     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2229 
2230     /* read in my part of the matrix column indices  */
2231     nz     = procsnz[0];
2232     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2233     mycols = ibuf;
2234     if (size == 1)  nz -= extra_rows;
2235     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2236     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2237 
2238     /* read in every ones (except the last) and ship off */
2239     for (i=1; i<size-1; i++) {
2240       nz   = procsnz[i];
2241       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2242       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2243     }
2244     /* read in the stuff for the last proc */
2245     if (size != 1) {
2246       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2247       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2248       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2249       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2250     }
2251     ierr = PetscFree(cols);CHKERRQ(ierr);
2252   } else {  /* procs[i], i>0 */
2253     /* determine buffer space needed for message */
2254     nz = 0;
2255     for (i=0; i<m; i++) {
2256       nz += locrowlens[i];
2257     }
2258     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2259     mycols = ibuf;
2260     /* receive message of column indices*/
2261     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2262     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2263     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2264   }
2265 
2266   /* loop over local rows, determining number of off diagonal entries */
2267   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2268   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2269   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2270   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2271   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2272   rowcount = 0;
2273   nzcount  = 0;
2274   for (i=0; i<mbs; i++) {
2275     dcount  = 0;
2276     odcount = 0;
2277     for (j=0; j<bs; j++) {
2278       kmax = locrowlens[rowcount];
2279       for (k=0; k<kmax; k++) {
2280         tmp = mycols[nzcount++]/bs; /* block col. index */
2281         if (!mask[tmp]) {
2282           mask[tmp] = 1;
2283           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2284           else masked1[dcount++] = tmp; /* entry in diag portion */
2285         }
2286       }
2287       rowcount++;
2288     }
2289 
2290     dlens[i]  = dcount;  /* d_nzz[i] */
2291     odlens[i] = odcount; /* o_nzz[i] */
2292 
2293     /* zero out the mask elements we set */
2294     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2295     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2296   }
2297 
2298   /* create our matrix */
2299   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2300   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2301   ierr = MatSetType(A,type);CHKERRQ(ierr);
2302   ierr = MatSetOption(A,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2303   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2304 
2305   if (!rank) {
2306     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2307     /* read in my part of the matrix numerical values  */
2308     nz = procsnz[0];
2309     vals = buf;
2310     mycols = ibuf;
2311     if (size == 1)  nz -= extra_rows;
2312     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2313     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2314 
2315     /* insert into matrix */
2316     jj      = rstart*bs;
2317     for (i=0; i<m; i++) {
2318       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2319       mycols += locrowlens[i];
2320       vals   += locrowlens[i];
2321       jj++;
2322     }
2323 
2324     /* read in other processors (except the last one) and ship out */
2325     for (i=1; i<size-1; i++) {
2326       nz   = procsnz[i];
2327       vals = buf;
2328       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2329       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2330     }
2331     /* the last proc */
2332     if (size != 1){
2333       nz   = procsnz[i] - extra_rows;
2334       vals = buf;
2335       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2336       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2337       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2338     }
2339     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2340 
2341   } else {
2342     /* receive numeric values */
2343     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2344 
2345     /* receive message of values*/
2346     vals   = buf;
2347     mycols = ibuf;
2348     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2349     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2350     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2351 
2352     /* insert into matrix */
2353     jj      = rstart*bs;
2354     for (i=0; i<m; i++) {
2355       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2356       mycols += locrowlens[i];
2357       vals   += locrowlens[i];
2358       jj++;
2359     }
2360   }
2361 
2362   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2363   ierr = PetscFree(buf);CHKERRQ(ierr);
2364   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2365   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2366   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2367   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2368   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   *newmat = A;
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2376 /*XXXXX@
2377    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2378 
2379    Input Parameters:
2380 .  mat  - the matrix
2381 .  fact - factor
2382 
2383    Collective on Mat
2384 
2385    Level: advanced
2386 
2387   Notes:
2388    This can also be set by the command line option: -mat_use_hash_table fact
2389 
2390 .keywords: matrix, hashtable, factor, HT
2391 
2392 .seealso: MatSetOption()
2393 @XXXXX*/
2394 
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2398 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2399 {
2400   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2401   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2402   PetscReal      atmp;
2403   PetscReal      *work,*svalues,*rvalues;
2404   PetscErrorCode ierr;
2405   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2406   PetscMPIInt    rank,size;
2407   PetscInt       *rowners_bs,dest,count,source;
2408   PetscScalar    *va;
2409   MatScalar      *ba;
2410   MPI_Status     stat;
2411 
2412   PetscFunctionBegin;
2413   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2414   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2415   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2416 
2417   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2418   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2419 
2420   bs   = A->rmap->bs;
2421   mbs  = a->mbs;
2422   Mbs  = a->Mbs;
2423   ba   = b->a;
2424   bi   = b->i;
2425   bj   = b->j;
2426 
2427   /* find ownerships */
2428   rowners_bs = A->rmap->range;
2429 
2430   /* each proc creates an array to be distributed */
2431   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2432   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2433 
2434   /* row_max for B */
2435   if (rank != size-1){
2436     for (i=0; i<mbs; i++) {
2437       ncols = bi[1] - bi[0]; bi++;
2438       brow  = bs*i;
2439       for (j=0; j<ncols; j++){
2440         bcol = bs*(*bj);
2441         for (kcol=0; kcol<bs; kcol++){
2442           col = bcol + kcol;                 /* local col index */
2443           col += rowners_bs[rank+1];      /* global col index */
2444           for (krow=0; krow<bs; krow++){
2445             atmp = PetscAbsScalar(*ba); ba++;
2446             row = brow + krow;    /* local row index */
2447             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2448             if (work[col] < atmp) work[col] = atmp;
2449           }
2450         }
2451         bj++;
2452       }
2453     }
2454 
2455     /* send values to its owners */
2456     for (dest=rank+1; dest<size; dest++){
2457       svalues = work + rowners_bs[dest];
2458       count   = rowners_bs[dest+1]-rowners_bs[dest];
2459       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2460     }
2461   }
2462 
2463   /* receive values */
2464   if (rank){
2465     rvalues = work;
2466     count   = rowners_bs[rank+1]-rowners_bs[rank];
2467     for (source=0; source<rank; source++){
2468       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2469       /* process values */
2470       for (i=0; i<count; i++){
2471         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2472       }
2473     }
2474   }
2475 
2476   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2477   ierr = PetscFree(work);CHKERRQ(ierr);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatSOR_MPISBAIJ"
2483 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2484 {
2485   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2486   PetscErrorCode    ierr;
2487   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2488   PetscScalar       *x,*ptr,*from;
2489   Vec               bb1;
2490   const PetscScalar *b;
2491 
2492   PetscFunctionBegin;
2493   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2494   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2495 
2496   if (flag == SOR_APPLY_UPPER) {
2497     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2498     PetscFunctionReturn(0);
2499   }
2500 
2501   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2502     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2503       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2504       its--;
2505     }
2506 
2507     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2508     while (its--){
2509 
2510       /* lower triangular part: slvec0b = - B^T*xx */
2511       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2512 
2513       /* copy xx into slvec0a */
2514       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2515       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2516       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2517       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2518 
2519       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2520 
2521       /* copy bb into slvec1a */
2522       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2523       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2524       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2525       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2526 
2527       /* set slvec1b = 0 */
2528       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2529 
2530       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2531       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2532       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2533       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2534 
2535       /* upper triangular part: bb1 = bb1 - B*x */
2536       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2537 
2538       /* local diagonal sweep */
2539       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2540     }
2541     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2542   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2543     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2544   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2545     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2546   } else if (flag & SOR_EISENSTAT) {
2547     Vec               xx1;
2548     PetscTruth        hasop;
2549     const PetscScalar *diag;
2550     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2551     PetscInt          i,n;
2552 
2553     if (!mat->xx1) {
2554       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2555       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2556     }
2557     xx1 = mat->xx1;
2558     bb1 = mat->bb1;
2559 
2560     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2561 
2562     if (!mat->diag) {
2563       /* this is wrong for same matrix with new nonzero values */
2564       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2565       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2566     }
2567     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2568 
2569     if (hasop) {
2570       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2571       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2572     } else {
2573       /*
2574           These two lines are replaced by code that may be a bit faster for a good compiler
2575       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2576       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2577       */
2578       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2579       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2580       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2581       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2582       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2583       if (omega == 1.0) {
2584 	for (i=0; i<n; i++) {
2585 	  sl[i] = b[i] - diag[i]*x[i];
2586 	}
2587         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2588       } else {
2589 	for (i=0; i<n; i++) {
2590 	  sl[i] = b[i] + scale*diag[i]*x[i];
2591 	}
2592         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2593       }
2594       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2595       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2596       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2597       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2598     }
2599 
2600     /* multiply off-diagonal portion of matrix */
2601     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2602     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2603     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2604     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2605     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2606     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2607     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2608     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2609     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2610     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2611 
2612     /* local sweep */
2613     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2614     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2615   } else {
2616     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2617   }
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 #undef __FUNCT__
2622 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2623 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2624 {
2625   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2626   PetscErrorCode ierr;
2627   Vec            lvec1,bb1;
2628 
2629   PetscFunctionBegin;
2630   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2631   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2632 
2633   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2634     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2635       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2636       its--;
2637     }
2638 
2639     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2640     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2641     while (its--){
2642       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2643 
2644       /* lower diagonal part: bb1 = bb - B^T*xx */
2645       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2646       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2647 
2648       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2649       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2650       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2651 
2652       /* upper diagonal part: bb1 = bb1 - B*x */
2653       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2654       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2655 
2656       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2657 
2658       /* diagonal sweep */
2659       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2660     }
2661     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2662     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2663   } else {
2664     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2665   }
2666   PetscFunctionReturn(0);
2667 }
2668 
2669