xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision fac7e3cdc257fbfc3a17cb177c9fb3b514c77af2)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscTruth     roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (!baij->donotstash) {
349         if (roworiented) {
350           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         } else {
352           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         }
354       }
355     }
356   }
357   PetscFunctionReturn(0);
358 }
359 
360 #undef __FUNCT__
361 #define __FUNCT__ "MatGetValues_MPISBAIJ"
362 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
363 {
364   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
365   PetscErrorCode ierr;
366   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
367   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
368 
369   PetscFunctionBegin;
370   for (i=0; i<m; i++) {
371     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
372     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
373     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
374       row = idxm[i] - bsrstart;
375       for (j=0; j<n; j++) {
376         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
377         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
378         if (idxn[j] >= bscstart && idxn[j] < bscend){
379           col = idxn[j] - bscstart;
380           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
381         } else {
382           if (!baij->colmap) {
383             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
384           }
385 #if defined (PETSC_USE_CTABLE)
386           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
387           data --;
388 #else
389           data = baij->colmap[idxn[j]/bs]-1;
390 #endif
391           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
392           else {
393             col  = data + idxn[j]%bs;
394             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
395           }
396         }
397       }
398     } else {
399       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
400     }
401   }
402  PetscFunctionReturn(0);
403 }
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatNorm_MPISBAIJ"
407 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
408 {
409   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
410   PetscErrorCode ierr;
411   PetscReal      sum[2],*lnorm2;
412 
413   PetscFunctionBegin;
414   if (baij->size == 1) {
415     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
416   } else {
417     if (type == NORM_FROBENIUS) {
418       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
419       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
420       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
421       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
423       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
424       *norm = sqrt(sum[0] + 2*sum[1]);
425       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
426     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
427       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
428       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
429       PetscReal    *rsum,*rsum2,vabs;
430       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
431       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
432       MatScalar    *v;
433 
434       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
435       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
436       /* Amat */
437       v = amat->a; jj = amat->j;
438       for (brow=0; brow<mbs; brow++) {
439         grow = bs*(rstart + brow);
440         nz = amat->i[brow+1] - amat->i[brow];
441         for (bcol=0; bcol<nz; bcol++){
442           gcol = bs*(rstart + *jj); jj++;
443           for (col=0; col<bs; col++){
444             for (row=0; row<bs; row++){
445               vabs = PetscAbsScalar(*v); v++;
446               rsum[gcol+col] += vabs;
447               /* non-diagonal block */
448               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
449             }
450           }
451         }
452       }
453       /* Bmat */
454       v = bmat->a; jj = bmat->j;
455       for (brow=0; brow<mbs; brow++) {
456         grow = bs*(rstart + brow);
457         nz = bmat->i[brow+1] - bmat->i[brow];
458         for (bcol=0; bcol<nz; bcol++){
459           gcol = bs*garray[*jj]; jj++;
460           for (col=0; col<bs; col++){
461             for (row=0; row<bs; row++){
462               vabs = PetscAbsScalar(*v); v++;
463               rsum[gcol+col] += vabs;
464               rsum[grow+row] += vabs;
465             }
466           }
467         }
468       }
469       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
470       *norm = 0.0;
471       for (col=0; col<mat->cmap->N; col++) {
472         if (rsum2[col] > *norm) *norm = rsum2[col];
473       }
474       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
475     } else {
476       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
477     }
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
485 {
486   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
487   PetscErrorCode ierr;
488   PetscInt       nstash,reallocs;
489   InsertMode     addv;
490 
491   PetscFunctionBegin;
492   if (baij->donotstash) {
493     PetscFunctionReturn(0);
494   }
495 
496   /* make sure all processors are either in INSERTMODE or ADDMODE */
497   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
498   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
777   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
778   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
779 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
780   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
781 #endif
782   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
785   ierr = PetscFree(baij);CHKERRQ(ierr);
786 
787   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807 
808   /* diagonal part */
809   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
810   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
811 
812   /* subdiagonal part */
813   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
814 
815   /* copy x into the vec slvec0 */
816   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
820   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   /* supperdiagonal part */
826   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMult_MPISBAIJ"
832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
833 {
834   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
835   PetscErrorCode ierr;
836   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
837   PetscScalar    *x,*from;
838 
839   PetscFunctionBegin;
840   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
841   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
842 
843   /* diagonal part */
844   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
845   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
846 
847   /* subdiagonal part */
848   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
849 
850   /* copy x into the vec slvec0 */
851   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
855   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* supperdiagonal part */
861   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
868 {
869   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
870   PetscErrorCode ierr;
871   PetscInt       nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876 
877   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
878   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
879 
880   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   /* do diagonal part */
882   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
883   /* do supperdiagonal part */
884   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
886   /* do subdiagonal part */
887   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
888   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890 
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
897 {
898   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
899   PetscErrorCode ierr;
900   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
901   PetscScalar    *x,*from,zero=0.0;
902 
903   PetscFunctionBegin;
904   /*
905   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
906   PetscSynchronizedFlush(((PetscObject)A)->comm);
907   */
908   /* diagonal part */
909   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
910   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
911 
912   /* subdiagonal part */
913   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
914 
915   /* copy x into the vec slvec0 */
916   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
917   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
918   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
919   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
920 
921   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
923   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924 
925   /* supperdiagonal part */
926   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   /* do diagonal part */
941   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
942   /* do supperdiagonal part */
943   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
945 
946   /* do subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
948   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950 
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) { max = tmp; }
1009     }
1010     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap  = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart)
1047               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1048             else break;
1049           }
1050           imark = i;
1051         }
1052         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1053         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054       }
1055     } else {
1056       if (idx) *idx = 0;
1057       if (v)   *v   = 0;
1058     }
1059   }
1060   *nz = nztot;
1061   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1069 {
1070   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1071 
1072   PetscFunctionBegin;
1073   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1074   baij->getrowactive = PETSC_FALSE;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1081 {
1082   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1083   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1084 
1085   PetscFunctionBegin;
1086   aA->getrow_utriangular = PETSC_TRUE;
1087   PetscFunctionReturn(0);
1088 }
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1092 {
1093   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1094   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1095 
1096   PetscFunctionBegin;
1097   aA->getrow_utriangular = PETSC_FALSE;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1110   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1117 {
1118   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1123   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1130 {
1131   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1136   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1143 {
1144   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1145   Mat            A = a->A,B = a->B;
1146   PetscErrorCode ierr;
1147   PetscReal      isend[5],irecv[5];
1148 
1149   PetscFunctionBegin;
1150   info->block_size     = (PetscReal)matin->rmap->bs;
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153   isend[3] = info->memory;  isend[4] = info->mallocs;
1154   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1156   isend[3] += info->memory;  isend[4] += info->mallocs;
1157   if (flag == MAT_LOCAL) {
1158     info->nz_used      = isend[0];
1159     info->nz_allocated = isend[1];
1160     info->nz_unneeded  = isend[2];
1161     info->memory       = isend[3];
1162     info->mallocs      = isend[4];
1163   } else if (flag == MAT_GLOBAL_MAX) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   } else if (flag == MAT_GLOBAL_SUM) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else {
1178     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1179   }
1180   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1181   info->fill_ratio_needed = 0;
1182   info->factor_mallocs    = 0;
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1189 {
1190   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1191   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   switch (op) {
1196   case MAT_NEW_NONZERO_LOCATIONS:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1199   case MAT_KEEP_NONZERO_PATTERN:
1200   case MAT_NEW_NONZERO_LOCATION_ERR:
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = flg;
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_NEW_DIAGONALS:
1210     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1211     break;
1212   case MAT_IGNORE_OFF_PROC_ENTRIES:
1213     a->donotstash = flg;
1214     break;
1215   case MAT_USE_HASH_TABLE:
1216     a->ht_flag = flg;
1217     break;
1218   case MAT_HERMITIAN:
1219     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1220     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1221     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1222     break;
1223   case MAT_SYMMETRIC:
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225     break;
1226   case MAT_STRUCTURALLY_SYMMETRIC:
1227     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1228     break;
1229   case MAT_SYMMETRY_ETERNAL:
1230     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1231     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1232     break;
1233   case MAT_IGNORE_LOWER_TRIANGULAR:
1234     aA->ignore_ltriangular = flg;
1235     break;
1236   case MAT_ERROR_LOWER_TRIANGULAR:
1237     aA->ignore_ltriangular = flg;
1238     break;
1239   case MAT_GETROW_UPPERTRIANGULAR:
1240     aA->getrow_utriangular = flg;
1241     break;
1242   default:
1243     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1244   }
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1250 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1251 {
1252   PetscErrorCode ierr;
1253   PetscFunctionBegin;
1254   if (MAT_INITIAL_MATRIX || *B != A) {
1255     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1262 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1263 {
1264   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1265   Mat            a=baij->A, b=baij->B;
1266   PetscErrorCode ierr;
1267   PetscInt       nv,m,n;
1268   PetscTruth     flg;
1269 
1270   PetscFunctionBegin;
1271   if (ll != rr){
1272     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1273     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1274   }
1275   if (!ll) PetscFunctionReturn(0);
1276 
1277   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1278   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1279 
1280   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1281   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1282 
1283   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1284 
1285   /* left diagonalscale the off-diagonal part */
1286   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1287 
1288   /* scale the diagonal part */
1289   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1290 
1291   /* right diagonalscale the off-diagonal part */
1292   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1299 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1300 {
1301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303 
1304   PetscFunctionBegin;
1305   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatEqual_MPISBAIJ"
1313 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1314 {
1315   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1316   Mat            a,b,c,d;
1317   PetscTruth     flg;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   a = matA->A; b = matA->B;
1322   c = matB->A; d = matB->B;
1323 
1324   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1325   if (flg) {
1326     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1327   }
1328   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatCopy_MPISBAIJ"
1334 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1335 {
1336   PetscErrorCode ierr;
1337   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1338   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1339 
1340   PetscFunctionBegin;
1341   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1342   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1343     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1344     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1345     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1346   } else {
1347     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1348     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1355 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1356 {
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1366 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1367 {
1368   PetscErrorCode ierr;
1369   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1370   PetscBLASInt   bnz,one=1;
1371   Mat_SeqSBAIJ   *xa,*ya;
1372   Mat_SeqBAIJ    *xb,*yb;
1373 
1374   PetscFunctionBegin;
1375   if (str == SAME_NONZERO_PATTERN) {
1376     PetscScalar alpha = a;
1377     xa = (Mat_SeqSBAIJ *)xx->A->data;
1378     ya = (Mat_SeqSBAIJ *)yy->A->data;
1379     bnz = PetscBLASIntCast(xa->nz);
1380     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1381     xb = (Mat_SeqBAIJ *)xx->B->data;
1382     yb = (Mat_SeqBAIJ *)yy->B->data;
1383     bnz = PetscBLASIntCast(xb->nz);
1384     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1385   } else {
1386     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1387     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1388     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1389   }
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1395 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1396 {
1397   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1398   PetscInt        rbs,cbs;
1399   PetscErrorCode  ierr;
1400 
1401   PetscFunctionBegin;
1402   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1403   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1404   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1405   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1406   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1407   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1414 {
1415   PetscErrorCode ierr;
1416   PetscInt       i;
1417   PetscTruth     flg;
1418 
1419   PetscFunctionBegin;
1420   for (i=0; i<n; i++) {
1421     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1422     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1423   }
1424   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 /* -------------------------------------------------------------------*/
1430 static struct _MatOps MatOps_Values = {
1431        MatSetValues_MPISBAIJ,
1432        MatGetRow_MPISBAIJ,
1433        MatRestoreRow_MPISBAIJ,
1434        MatMult_MPISBAIJ,
1435 /* 4*/ MatMultAdd_MPISBAIJ,
1436        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1437        MatMultAdd_MPISBAIJ,
1438        0,
1439        0,
1440        0,
1441 /*10*/ 0,
1442        0,
1443        0,
1444        MatSOR_MPISBAIJ,
1445        MatTranspose_MPISBAIJ,
1446 /*15*/ MatGetInfo_MPISBAIJ,
1447        MatEqual_MPISBAIJ,
1448        MatGetDiagonal_MPISBAIJ,
1449        MatDiagonalScale_MPISBAIJ,
1450        MatNorm_MPISBAIJ,
1451 /*20*/ MatAssemblyBegin_MPISBAIJ,
1452        MatAssemblyEnd_MPISBAIJ,
1453        MatSetOption_MPISBAIJ,
1454        MatZeroEntries_MPISBAIJ,
1455 /*24*/ 0,
1456        0,
1457        0,
1458        0,
1459        0,
1460 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1461        0,
1462        0,
1463        0,
1464        0,
1465 /*34*/ MatDuplicate_MPISBAIJ,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*39*/ MatAXPY_MPISBAIJ,
1471        MatGetSubMatrices_MPISBAIJ,
1472        MatIncreaseOverlap_MPISBAIJ,
1473        MatGetValues_MPISBAIJ,
1474        MatCopy_MPISBAIJ,
1475 /*44*/ 0,
1476        MatScale_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480 /*49*/ MatSetBlockSize_MPISBAIJ,
1481        0,
1482        0,
1483        0,
1484        0,
1485 /*54*/ 0,
1486        0,
1487        MatSetUnfactored_MPISBAIJ,
1488        0,
1489        MatSetValuesBlocked_MPISBAIJ,
1490 /*59*/ 0,
1491        0,
1492        0,
1493        0,
1494        0,
1495 /*64*/ 0,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*74*/ 0,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*79*/ 0,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*84*/ 0,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*89*/ 0,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*94*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*99*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*104*/0,
1536        MatRealPart_MPISBAIJ,
1537        MatImaginaryPart_MPISBAIJ,
1538        MatGetRowUpperTriangular_MPISBAIJ,
1539        MatRestoreRowUpperTriangular_MPISBAIJ,
1540 /*109*/0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*114*/0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*119*/0,
1551        0,
1552        0,
1553        0,
1554        MatLoadnew_MPISBAIJ
1555 };
1556 
1557 
1558 EXTERN_C_BEGIN
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1561 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1562 {
1563   PetscFunctionBegin;
1564   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1565   *iscopy = PETSC_FALSE;
1566   PetscFunctionReturn(0);
1567 }
1568 EXTERN_C_END
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1573 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1574 {
1575   Mat_MPISBAIJ   *b;
1576   PetscErrorCode ierr;
1577   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1578 
1579   PetscFunctionBegin;
1580   if (bs < 0){
1581     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1582       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1583     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1584     bs   = PetscAbs(bs);
1585   }
1586   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1587   bs = newbs;
1588 
1589   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1590   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1591   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1592   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1593 
1594   B->rmap->bs = B->cmap->bs = bs;
1595   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1596   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1597 
1598   if (d_nnz) {
1599     for (i=0; i<B->rmap->n/bs; i++) {
1600       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1601     }
1602   }
1603   if (o_nnz) {
1604     for (i=0; i<B->rmap->n/bs; i++) {
1605       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1606     }
1607   }
1608 
1609   b   = (Mat_MPISBAIJ*)B->data;
1610   mbs = B->rmap->n/bs;
1611   Mbs = B->rmap->N/bs;
1612   if (mbs*bs != B->rmap->n) {
1613     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1614   }
1615 
1616   B->rmap->bs  = bs;
1617   b->bs2 = bs*bs;
1618   b->mbs = mbs;
1619   b->nbs = mbs;
1620   b->Mbs = Mbs;
1621   b->Nbs = Mbs;
1622 
1623   for (i=0; i<=b->size; i++) {
1624     b->rangebs[i] = B->rmap->range[i]/bs;
1625   }
1626   b->rstartbs = B->rmap->rstart/bs;
1627   b->rendbs   = B->rmap->rend/bs;
1628 
1629   b->cstartbs = B->cmap->rstart/bs;
1630   b->cendbs   = B->cmap->rend/bs;
1631 
1632   if (!B->preallocated) {
1633     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1634     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1635     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1636     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1637     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1638     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1639     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1640     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1641     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1642   }
1643 
1644   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1645   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1646   B->preallocated = PETSC_TRUE;
1647   PetscFunctionReturn(0);
1648 }
1649 EXTERN_C_END
1650 
1651 EXTERN_C_BEGIN
1652 #if defined(PETSC_HAVE_MUMPS)
1653 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1654 #endif
1655 #if defined(PETSC_HAVE_SPOOLES)
1656 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1657 #endif
1658 #if defined(PETSC_HAVE_PASTIX)
1659 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1660 #endif
1661 EXTERN_C_END
1662 
1663 /*MC
1664    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1665    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1666 
1667    Options Database Keys:
1668 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1669 
1670   Level: beginner
1671 
1672 .seealso: MatCreateMPISBAIJ
1673 M*/
1674 
1675 EXTERN_C_BEGIN
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatCreate_MPISBAIJ"
1678 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1679 {
1680   Mat_MPISBAIJ   *b;
1681   PetscErrorCode ierr;
1682   PetscTruth     flg;
1683 
1684   PetscFunctionBegin;
1685 
1686   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1687   B->data = (void*)b;
1688   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1689 
1690   B->ops->destroy    = MatDestroy_MPISBAIJ;
1691   B->ops->view       = MatView_MPISBAIJ;
1692   B->mapping         = 0;
1693   B->assembled       = PETSC_FALSE;
1694 
1695   B->insertmode = NOT_SET_VALUES;
1696   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1697   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1698 
1699   /* build local table of row and column ownerships */
1700   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1701 
1702   /* build cache for off array entries formed */
1703   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1704   b->donotstash  = PETSC_FALSE;
1705   b->colmap      = PETSC_NULL;
1706   b->garray      = PETSC_NULL;
1707   b->roworiented = PETSC_TRUE;
1708 
1709   /* stuff used in block assembly */
1710   b->barray       = 0;
1711 
1712   /* stuff used for matrix vector multiply */
1713   b->lvec         = 0;
1714   b->Mvctx        = 0;
1715   b->slvec0       = 0;
1716   b->slvec0b      = 0;
1717   b->slvec1       = 0;
1718   b->slvec1a      = 0;
1719   b->slvec1b      = 0;
1720   b->sMvctx       = 0;
1721 
1722   /* stuff for MatGetRow() */
1723   b->rowindices   = 0;
1724   b->rowvalues    = 0;
1725   b->getrowactive = PETSC_FALSE;
1726 
1727   /* hash table stuff */
1728   b->ht           = 0;
1729   b->hd           = 0;
1730   b->ht_size      = 0;
1731   b->ht_flag      = PETSC_FALSE;
1732   b->ht_fact      = 0;
1733   b->ht_total_ct  = 0;
1734   b->ht_insert_ct = 0;
1735 
1736   b->in_loc       = 0;
1737   b->v_loc        = 0;
1738   b->n_loc        = 0;
1739   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1740     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1741     if (flg) {
1742       PetscReal fact = 1.39;
1743       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1744       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1745       if (fact <= 1.0) fact = 1.39;
1746       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1747       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1748     }
1749   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1750 
1751 #if defined(PETSC_HAVE_PASTIX)
1752   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1753 					   "MatGetFactor_mpisbaij_pastix",
1754 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1755 #endif
1756 #if defined(PETSC_HAVE_MUMPS)
1757   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1758                                      "MatGetFactor_sbaij_mumps",
1759                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1760 #endif
1761 #if defined(PETSC_HAVE_SPOOLES)
1762   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1763                                      "MatGetFactor_mpisbaij_spooles",
1764                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1765 #endif
1766   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1767                                      "MatStoreValues_MPISBAIJ",
1768                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1770                                      "MatRetrieveValues_MPISBAIJ",
1771                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1773                                      "MatGetDiagonalBlock_MPISBAIJ",
1774                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1776                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1777                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1778   B->symmetric                  = PETSC_TRUE;
1779   B->structurally_symmetric     = PETSC_TRUE;
1780   B->symmetric_set              = PETSC_TRUE;
1781   B->structurally_symmetric_set = PETSC_TRUE;
1782   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 EXTERN_C_END
1786 
1787 /*MC
1788    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1789 
1790    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1791    and MATMPISBAIJ otherwise.
1792 
1793    Options Database Keys:
1794 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1795 
1796   Level: beginner
1797 
1798 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1799 M*/
1800 
1801 EXTERN_C_BEGIN
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatCreate_SBAIJ"
1804 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1805 {
1806   PetscErrorCode ierr;
1807   PetscMPIInt    size;
1808 
1809   PetscFunctionBegin;
1810   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1811   if (size == 1) {
1812     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1813   } else {
1814     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 EXTERN_C_END
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1822 /*@C
1823    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1824    the user should preallocate the matrix storage by setting the parameters
1825    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1826    performance can be increased by more than a factor of 50.
1827 
1828    Collective on Mat
1829 
1830    Input Parameters:
1831 +  A - the matrix
1832 .  bs   - size of blockk
1833 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1834            submatrix  (same for all local rows)
1835 .  d_nnz - array containing the number of block nonzeros in the various block rows
1836            in the upper triangular and diagonal part of the in diagonal portion of the local
1837            (possibly different for each block row) or PETSC_NULL.  You must leave room
1838            for the diagonal entry even if it is zero.
1839 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1840            submatrix (same for all local rows).
1841 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1842            off-diagonal portion of the local submatrix (possibly different for
1843            each block row) or PETSC_NULL.
1844 
1845 
1846    Options Database Keys:
1847 .   -mat_no_unroll - uses code that does not unroll the loops in the
1848                      block calculations (much slower)
1849 .   -mat_block_size - size of the blocks to use
1850 
1851    Notes:
1852 
1853    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1854    than it must be used on all processors that share the object for that argument.
1855 
1856    If the *_nnz parameter is given then the *_nz parameter is ignored
1857 
1858    Storage Information:
1859    For a square global matrix we define each processor's diagonal portion
1860    to be its local rows and the corresponding columns (a square submatrix);
1861    each processor's off-diagonal portion encompasses the remainder of the
1862    local matrix (a rectangular submatrix).
1863 
1864    The user can specify preallocated storage for the diagonal part of
1865    the local submatrix with either d_nz or d_nnz (not both).  Set
1866    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1867    memory allocation.  Likewise, specify preallocated storage for the
1868    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1869 
1870    You can call MatGetInfo() to get information on how effective the preallocation was;
1871    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1872    You can also run with the option -info and look for messages with the string
1873    malloc in them to see if additional memory allocation was needed.
1874 
1875    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1876    the figure below we depict these three local rows and all columns (0-11).
1877 
1878 .vb
1879            0 1 2 3 4 5 6 7 8 9 10 11
1880           -------------------
1881    row 3  |  o o o d d d o o o o o o
1882    row 4  |  o o o d d d o o o o o o
1883    row 5  |  o o o d d d o o o o o o
1884           -------------------
1885 .ve
1886 
1887    Thus, any entries in the d locations are stored in the d (diagonal)
1888    submatrix, and any entries in the o locations are stored in the
1889    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1890    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1891 
1892    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1893    plus the diagonal part of the d matrix,
1894    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1895    In general, for PDE problems in which most nonzeros are near the diagonal,
1896    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1897    or you will get TERRIBLE performance; see the users' manual chapter on
1898    matrices.
1899 
1900    Level: intermediate
1901 
1902 .keywords: matrix, block, aij, compressed row, sparse, parallel
1903 
1904 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1905 @*/
1906 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1907 {
1908   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1909 
1910   PetscFunctionBegin;
1911   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1912   if (f) {
1913     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatCreateMPISBAIJ"
1920 /*@C
1921    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1922    (block compressed row).  For good matrix assembly performance
1923    the user should preallocate the matrix storage by setting the parameters
1924    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1925    performance can be increased by more than a factor of 50.
1926 
1927    Collective on MPI_Comm
1928 
1929    Input Parameters:
1930 +  comm - MPI communicator
1931 .  bs   - size of blockk
1932 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1933            This value should be the same as the local size used in creating the
1934            y vector for the matrix-vector product y = Ax.
1935 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1936            This value should be the same as the local size used in creating the
1937            x vector for the matrix-vector product y = Ax.
1938 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1939 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1940 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1941            submatrix  (same for all local rows)
1942 .  d_nnz - array containing the number of block nonzeros in the various block rows
1943            in the upper triangular portion of the in diagonal portion of the local
1944            (possibly different for each block block row) or PETSC_NULL.
1945            You must leave room for the diagonal entry even if it is zero.
1946 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1947            submatrix (same for all local rows).
1948 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1949            off-diagonal portion of the local submatrix (possibly different for
1950            each block row) or PETSC_NULL.
1951 
1952    Output Parameter:
1953 .  A - the matrix
1954 
1955    Options Database Keys:
1956 .   -mat_no_unroll - uses code that does not unroll the loops in the
1957                      block calculations (much slower)
1958 .   -mat_block_size - size of the blocks to use
1959 .   -mat_mpi - use the parallel matrix data structures even on one processor
1960                (defaults to using SeqBAIJ format on one processor)
1961 
1962    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1963    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1964    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1965 
1966    Notes:
1967    The number of rows and columns must be divisible by blocksize.
1968    This matrix type does not support complex Hermitian operation.
1969 
1970    The user MUST specify either the local or global matrix dimensions
1971    (possibly both).
1972 
1973    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1974    than it must be used on all processors that share the object for that argument.
1975 
1976    If the *_nnz parameter is given then the *_nz parameter is ignored
1977 
1978    Storage Information:
1979    For a square global matrix we define each processor's diagonal portion
1980    to be its local rows and the corresponding columns (a square submatrix);
1981    each processor's off-diagonal portion encompasses the remainder of the
1982    local matrix (a rectangular submatrix).
1983 
1984    The user can specify preallocated storage for the diagonal part of
1985    the local submatrix with either d_nz or d_nnz (not both).  Set
1986    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1987    memory allocation.  Likewise, specify preallocated storage for the
1988    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1989 
1990    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1991    the figure below we depict these three local rows and all columns (0-11).
1992 
1993 .vb
1994            0 1 2 3 4 5 6 7 8 9 10 11
1995           -------------------
1996    row 3  |  o o o d d d o o o o o o
1997    row 4  |  o o o d d d o o o o o o
1998    row 5  |  o o o d d d o o o o o o
1999           -------------------
2000 .ve
2001 
2002    Thus, any entries in the d locations are stored in the d (diagonal)
2003    submatrix, and any entries in the o locations are stored in the
2004    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2005    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2006 
2007    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2008    plus the diagonal part of the d matrix,
2009    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2010    In general, for PDE problems in which most nonzeros are near the diagonal,
2011    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2012    or you will get TERRIBLE performance; see the users' manual chapter on
2013    matrices.
2014 
2015    Level: intermediate
2016 
2017 .keywords: matrix, block, aij, compressed row, sparse, parallel
2018 
2019 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2020 @*/
2021 
2022 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2023 {
2024   PetscErrorCode ierr;
2025   PetscMPIInt    size;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2029   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2030   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2031   if (size > 1) {
2032     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2033     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2034   } else {
2035     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2036     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2037   }
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2044 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2045 {
2046   Mat            mat;
2047   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2048   PetscErrorCode ierr;
2049   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2050   PetscScalar    *array;
2051 
2052   PetscFunctionBegin;
2053   *newmat       = 0;
2054   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2055   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2056   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2057   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2058   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2059   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2060 
2061   mat->factortype   = matin->factortype;
2062   mat->preallocated = PETSC_TRUE;
2063   mat->assembled    = PETSC_TRUE;
2064   mat->insertmode   = NOT_SET_VALUES;
2065 
2066   a = (Mat_MPISBAIJ*)mat->data;
2067   a->bs2   = oldmat->bs2;
2068   a->mbs   = oldmat->mbs;
2069   a->nbs   = oldmat->nbs;
2070   a->Mbs   = oldmat->Mbs;
2071   a->Nbs   = oldmat->Nbs;
2072 
2073 
2074   a->size         = oldmat->size;
2075   a->rank         = oldmat->rank;
2076   a->donotstash   = oldmat->donotstash;
2077   a->roworiented  = oldmat->roworiented;
2078   a->rowindices   = 0;
2079   a->rowvalues    = 0;
2080   a->getrowactive = PETSC_FALSE;
2081   a->barray       = 0;
2082   a->rstartbs    = oldmat->rstartbs;
2083   a->rendbs      = oldmat->rendbs;
2084   a->cstartbs    = oldmat->cstartbs;
2085   a->cendbs      = oldmat->cendbs;
2086 
2087   /* hash table stuff */
2088   a->ht           = 0;
2089   a->hd           = 0;
2090   a->ht_size      = 0;
2091   a->ht_flag      = oldmat->ht_flag;
2092   a->ht_fact      = oldmat->ht_fact;
2093   a->ht_total_ct  = 0;
2094   a->ht_insert_ct = 0;
2095 
2096   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2097   if (oldmat->colmap) {
2098 #if defined (PETSC_USE_CTABLE)
2099     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2100 #else
2101     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2102     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2103     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2104 #endif
2105   } else a->colmap = 0;
2106 
2107   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2108     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2109     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2110     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2111   } else a->garray = 0;
2112 
2113   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2114   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2115   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2116   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2117   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2118 
2119   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2121   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2122   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2123 
2124   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2125   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2126   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2127   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2128   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2129   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2131   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2137 
2138   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2139   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2140   a->sMvctx = oldmat->sMvctx;
2141   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2142 
2143   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2145   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2146   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2147   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2148   *newmat = mat;
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatLoadnew_MPISBAIJ"
2154 PetscErrorCode MatLoadnew_MPISBAIJ(PetscViewer viewer, Mat newmat)
2155 {
2156   PetscErrorCode ierr;
2157   PetscInt       i,nz,j,rstart,rend;
2158   PetscScalar    *vals,*buf;
2159   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2160   MPI_Status     status;
2161   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2162   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2163   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2164   PetscInt       bs=1,Mbs,mbs,extra_rows;
2165   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2166   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2167   int            fd;
2168 
2169   PetscFunctionBegin;
2170   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2171     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2172   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2173 
2174   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2175   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2176   if (!rank) {
2177     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2178     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2179     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2180     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2181   }
2182 
2183   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2184 
2185   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2186   M = header[1]; N = header[2];
2187 
2188   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2189   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2190   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2191 
2192   /* If global sizes are set, check if they are consistent with that given in the file */
2193   if (sizesset) {
2194     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2195   }
2196   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2197   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2198 
2199   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2200 
2201   /*
2202      This code adds extra rows to make sure the number of rows is
2203      divisible by the blocksize
2204   */
2205   Mbs        = M/bs;
2206   extra_rows = bs - M + bs*(Mbs);
2207   if (extra_rows == bs) extra_rows = 0;
2208   else                  Mbs++;
2209   if (extra_rows &&!rank) {
2210     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2211   }
2212 
2213   /* determine ownership of all rows */
2214   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2215     mbs        = Mbs/size + ((Mbs % size) > rank);
2216     m          = mbs*bs;
2217   } else { /* User Set */
2218     m          = newmat->rmap->n;
2219     mbs        = m/bs;
2220   }
2221   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2222   mmbs       = PetscMPIIntCast(mbs);
2223   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2224   rowners[0] = 0;
2225   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2226   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2227   rstart = rowners[rank];
2228   rend   = rowners[rank+1];
2229 
2230   /* distribute row lengths to all processors */
2231   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2232   if (!rank) {
2233     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2234     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2235     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2236     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2237     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2238     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2239     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2240   } else {
2241     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2242   }
2243 
2244   if (!rank) {   /* procs[0] */
2245     /* calculate the number of nonzeros on each processor */
2246     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2247     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2248     for (i=0; i<size; i++) {
2249       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2250         procsnz[i] += rowlengths[j];
2251       }
2252     }
2253     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2254 
2255     /* determine max buffer needed and allocate it */
2256     maxnz = 0;
2257     for (i=0; i<size; i++) {
2258       maxnz = PetscMax(maxnz,procsnz[i]);
2259     }
2260     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2261 
2262     /* read in my part of the matrix column indices  */
2263     nz     = procsnz[0];
2264     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2265     mycols = ibuf;
2266     if (size == 1)  nz -= extra_rows;
2267     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2268     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2269 
2270     /* read in every ones (except the last) and ship off */
2271     for (i=1; i<size-1; i++) {
2272       nz   = procsnz[i];
2273       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2274       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2275     }
2276     /* read in the stuff for the last proc */
2277     if (size != 1) {
2278       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2279       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2280       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2281       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2282     }
2283     ierr = PetscFree(cols);CHKERRQ(ierr);
2284   } else {  /* procs[i], i>0 */
2285     /* determine buffer space needed for message */
2286     nz = 0;
2287     for (i=0; i<m; i++) {
2288       nz += locrowlens[i];
2289     }
2290     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2291     mycols = ibuf;
2292     /* receive message of column indices*/
2293     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2294     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2295     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2296   }
2297 
2298   /* loop over local rows, determining number of off diagonal entries */
2299   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2300   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2301   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2302   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2303   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2304   rowcount = 0;
2305   nzcount  = 0;
2306   for (i=0; i<mbs; i++) {
2307     dcount  = 0;
2308     odcount = 0;
2309     for (j=0; j<bs; j++) {
2310       kmax = locrowlens[rowcount];
2311       for (k=0; k<kmax; k++) {
2312         tmp = mycols[nzcount++]/bs; /* block col. index */
2313         if (!mask[tmp]) {
2314           mask[tmp] = 1;
2315           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2316           else masked1[dcount++] = tmp; /* entry in diag portion */
2317         }
2318       }
2319       rowcount++;
2320     }
2321 
2322     dlens[i]  = dcount;  /* d_nzz[i] */
2323     odlens[i] = odcount; /* o_nzz[i] */
2324 
2325     /* zero out the mask elements we set */
2326     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2327     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2328   }
2329     if (!sizesset) {
2330     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2331   }
2332   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2333   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2334 
2335   if (!rank) {
2336     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2337     /* read in my part of the matrix numerical values  */
2338     nz = procsnz[0];
2339     vals = buf;
2340     mycols = ibuf;
2341     if (size == 1)  nz -= extra_rows;
2342     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2343     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2344 
2345     /* insert into matrix */
2346     jj      = rstart*bs;
2347     for (i=0; i<m; i++) {
2348       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2349       mycols += locrowlens[i];
2350       vals   += locrowlens[i];
2351       jj++;
2352     }
2353 
2354     /* read in other processors (except the last one) and ship out */
2355     for (i=1; i<size-1; i++) {
2356       nz   = procsnz[i];
2357       vals = buf;
2358       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2359       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2360     }
2361     /* the last proc */
2362     if (size != 1){
2363       nz   = procsnz[i] - extra_rows;
2364       vals = buf;
2365       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2366       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2367       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2368     }
2369     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2370 
2371   } else {
2372     /* receive numeric values */
2373     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2374 
2375     /* receive message of values*/
2376     vals   = buf;
2377     mycols = ibuf;
2378     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2379     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2380     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2381 
2382     /* insert into matrix */
2383     jj      = rstart*bs;
2384     for (i=0; i<m; i++) {
2385       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2386       mycols += locrowlens[i];
2387       vals   += locrowlens[i];
2388       jj++;
2389     }
2390   }
2391 
2392   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2393   ierr = PetscFree(buf);CHKERRQ(ierr);
2394   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2395   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2396   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2397   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2398   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2399   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2400   PetscFunctionReturn(0);
2401 }
2402 
2403 #undef __FUNCT__
2404 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2405 /*XXXXX@
2406    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2407 
2408    Input Parameters:
2409 .  mat  - the matrix
2410 .  fact - factor
2411 
2412    Collective on Mat
2413 
2414    Level: advanced
2415 
2416   Notes:
2417    This can also be set by the command line option: -mat_use_hash_table fact
2418 
2419 .keywords: matrix, hashtable, factor, HT
2420 
2421 .seealso: MatSetOption()
2422 @XXXXX*/
2423 
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2427 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2428 {
2429   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2430   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2431   PetscReal      atmp;
2432   PetscReal      *work,*svalues,*rvalues;
2433   PetscErrorCode ierr;
2434   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2435   PetscMPIInt    rank,size;
2436   PetscInt       *rowners_bs,dest,count,source;
2437   PetscScalar    *va;
2438   MatScalar      *ba;
2439   MPI_Status     stat;
2440 
2441   PetscFunctionBegin;
2442   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2443   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2444   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2445 
2446   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2447   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2448 
2449   bs   = A->rmap->bs;
2450   mbs  = a->mbs;
2451   Mbs  = a->Mbs;
2452   ba   = b->a;
2453   bi   = b->i;
2454   bj   = b->j;
2455 
2456   /* find ownerships */
2457   rowners_bs = A->rmap->range;
2458 
2459   /* each proc creates an array to be distributed */
2460   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2461   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2462 
2463   /* row_max for B */
2464   if (rank != size-1){
2465     for (i=0; i<mbs; i++) {
2466       ncols = bi[1] - bi[0]; bi++;
2467       brow  = bs*i;
2468       for (j=0; j<ncols; j++){
2469         bcol = bs*(*bj);
2470         for (kcol=0; kcol<bs; kcol++){
2471           col = bcol + kcol;                 /* local col index */
2472           col += rowners_bs[rank+1];      /* global col index */
2473           for (krow=0; krow<bs; krow++){
2474             atmp = PetscAbsScalar(*ba); ba++;
2475             row = brow + krow;    /* local row index */
2476             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2477             if (work[col] < atmp) work[col] = atmp;
2478           }
2479         }
2480         bj++;
2481       }
2482     }
2483 
2484     /* send values to its owners */
2485     for (dest=rank+1; dest<size; dest++){
2486       svalues = work + rowners_bs[dest];
2487       count   = rowners_bs[dest+1]-rowners_bs[dest];
2488       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2489     }
2490   }
2491 
2492   /* receive values */
2493   if (rank){
2494     rvalues = work;
2495     count   = rowners_bs[rank+1]-rowners_bs[rank];
2496     for (source=0; source<rank; source++){
2497       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2498       /* process values */
2499       for (i=0; i<count; i++){
2500         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2501       }
2502     }
2503   }
2504 
2505   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2506   ierr = PetscFree(work);CHKERRQ(ierr);
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatSOR_MPISBAIJ"
2512 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2513 {
2514   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2515   PetscErrorCode    ierr;
2516   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2517   PetscScalar       *x,*ptr,*from;
2518   Vec               bb1;
2519   const PetscScalar *b;
2520 
2521   PetscFunctionBegin;
2522   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2523   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2524 
2525   if (flag == SOR_APPLY_UPPER) {
2526     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2527     PetscFunctionReturn(0);
2528   }
2529 
2530   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2531     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2532       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2533       its--;
2534     }
2535 
2536     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2537     while (its--){
2538 
2539       /* lower triangular part: slvec0b = - B^T*xx */
2540       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2541 
2542       /* copy xx into slvec0a */
2543       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2544       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2545       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2546       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2547 
2548       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2549 
2550       /* copy bb into slvec1a */
2551       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2552       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2553       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2554       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2555 
2556       /* set slvec1b = 0 */
2557       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2558 
2559       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2560       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2561       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2562       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2563 
2564       /* upper triangular part: bb1 = bb1 - B*x */
2565       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2566 
2567       /* local diagonal sweep */
2568       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2569     }
2570     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2571   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2572     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2573   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2574     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2575   } else if (flag & SOR_EISENSTAT) {
2576     Vec               xx1;
2577     PetscTruth        hasop;
2578     const PetscScalar *diag;
2579     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2580     PetscInt          i,n;
2581 
2582     if (!mat->xx1) {
2583       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2584       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2585     }
2586     xx1 = mat->xx1;
2587     bb1 = mat->bb1;
2588 
2589     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2590 
2591     if (!mat->diag) {
2592       /* this is wrong for same matrix with new nonzero values */
2593       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2594       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2595     }
2596     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2597 
2598     if (hasop) {
2599       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2600       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2601     } else {
2602       /*
2603           These two lines are replaced by code that may be a bit faster for a good compiler
2604       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2605       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2606       */
2607       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2608       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2609       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2610       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2611       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2612       if (omega == 1.0) {
2613 	for (i=0; i<n; i++) {
2614 	  sl[i] = b[i] - diag[i]*x[i];
2615 	}
2616         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2617       } else {
2618 	for (i=0; i<n; i++) {
2619 	  sl[i] = b[i] + scale*diag[i]*x[i];
2620 	}
2621         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2622       }
2623       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2624       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2625       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2626       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2627     }
2628 
2629     /* multiply off-diagonal portion of matrix */
2630     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2631     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2632     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2633     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2634     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2635     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2636     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2637     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2638     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2639     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2640 
2641     /* local sweep */
2642     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2643     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2644   } else {
2645     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2646   }
2647   PetscFunctionReturn(0);
2648 }
2649 
2650 #undef __FUNCT__
2651 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2652 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2653 {
2654   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2655   PetscErrorCode ierr;
2656   Vec            lvec1,bb1;
2657 
2658   PetscFunctionBegin;
2659   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2660   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2661 
2662   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2663     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2664       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2665       its--;
2666     }
2667 
2668     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2669     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2670     while (its--){
2671       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2672 
2673       /* lower diagonal part: bb1 = bb - B^T*xx */
2674       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2675       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2676 
2677       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2678       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2679       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2680 
2681       /* upper diagonal part: bb1 = bb1 - B*x */
2682       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2683       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2684 
2685       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2686 
2687       /* diagonal sweep */
2688       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2689     }
2690     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2691     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2692   } else {
2693     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2694   }
2695   PetscFunctionReturn(0);
2696 }
2697 
2698