xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4b4eb8d3b5c67aff3d3d02420f547e6671225154)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
9 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
10 {
11   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
12   PetscErrorCode ierr;
13 
14   PetscFunctionBegin;
15   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
16   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
22 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
23 {
24   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
25   PetscErrorCode ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
29   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 
33 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
34   { \
35  \
36     brow = row/bs;  \
37     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
38     rmax = aimax[brow]; nrow = ailen[brow]; \
39     bcol = col/bs; \
40     ridx = row % bs; cidx = col % bs; \
41     low  = 0; high = nrow; \
42     while (high-low > 3) { \
43       t = (low+high)/2; \
44       if (rp[t] > bcol) high = t; \
45       else              low  = t; \
46     } \
47     for (_i=low; _i<high; _i++) { \
48       if (rp[_i] > bcol) break; \
49       if (rp[_i] == bcol) { \
50         bap = ap + bs2*_i + bs*cidx + ridx; \
51         if (addv == ADD_VALUES) *bap += value;  \
52         else                    *bap  = value;  \
53         goto a_noinsert; \
54       } \
55     } \
56     if (a->nonew == 1) goto a_noinsert; \
57     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
59     N = nrow++ - 1;  \
60     /* shift up all the later entries in this row */ \
61     for (ii=N; ii>=_i; ii--) { \
62       rp[ii+1] = rp[ii]; \
63       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
64     } \
65     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
66     rp[_i]                      = bcol;  \
67     ap[bs2*_i + bs*cidx + ridx] = value;  \
68     A->nonzerostate++;\
69 a_noinsert:; \
70     ailen[brow] = nrow; \
71   }
72 
73 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
74   { \
75     brow = row/bs;  \
76     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
77     rmax = bimax[brow]; nrow = bilen[brow]; \
78     bcol = col/bs; \
79     ridx = row % bs; cidx = col % bs; \
80     low  = 0; high = nrow; \
81     while (high-low > 3) { \
82       t = (low+high)/2; \
83       if (rp[t] > bcol) high = t; \
84       else              low  = t; \
85     } \
86     for (_i=low; _i<high; _i++) { \
87       if (rp[_i] > bcol) break; \
88       if (rp[_i] == bcol) { \
89         bap = ap + bs2*_i + bs*cidx + ridx; \
90         if (addv == ADD_VALUES) *bap += value;  \
91         else                    *bap  = value;  \
92         goto b_noinsert; \
93       } \
94     } \
95     if (b->nonew == 1) goto b_noinsert; \
96     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
97     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
98     N = nrow++ - 1;  \
99     /* shift up all the later entries in this row */ \
100     for (ii=N; ii>=_i; ii--) { \
101       rp[ii+1] = rp[ii]; \
102       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
103     } \
104     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
105     rp[_i]                      = bcol;  \
106     ap[bs2*_i + bs*cidx + ridx] = value;  \
107     B->nonzerostate++;\
108 b_noinsert:; \
109     bilen[brow] = nrow; \
110   }
111 
112 /* Only add/insert a(i,j) with i<=j (blocks).
113    Any a(i,j) with i>j input by user is ingored.
114 */
115 #undef __FUNCT__
116 #define __FUNCT__ "MatSetValues_MPISBAIJ"
117 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
118 {
119   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
120   MatScalar      value;
121   PetscBool      roworiented = baij->roworiented;
122   PetscErrorCode ierr;
123   PetscInt       i,j,row,col;
124   PetscInt       rstart_orig=mat->rmap->rstart;
125   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
126   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
127 
128   /* Some Variables required in the macro */
129   Mat          A     = baij->A;
130   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
131   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
132   MatScalar    *aa   =a->a;
133 
134   Mat         B     = baij->B;
135   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
136   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
137   MatScalar   *ba   =b->a;
138 
139   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
140   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
141   MatScalar *ap,*bap;
142 
143   /* for stash */
144   PetscInt  n_loc, *in_loc = NULL;
145   MatScalar *v_loc = NULL;
146 
147   PetscFunctionBegin;
148   if (!baij->donotstash) {
149     if (n > baij->n_loc) {
150       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
151       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
152       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
153       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
154 
155       baij->n_loc = n;
156     }
157     in_loc = baij->in_loc;
158     v_loc  = baij->v_loc;
159   }
160 
161   for (i=0; i<m; i++) {
162     if (im[i] < 0) continue;
163 #if defined(PETSC_USE_DEBUG)
164     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
165 #endif
166     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
167       row = im[i] - rstart_orig;              /* local row index */
168       for (j=0; j<n; j++) {
169         if (im[i]/bs > in[j]/bs) {
170           if (a->ignore_ltriangular) {
171             continue;    /* ignore lower triangular blocks */
172           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
173         }
174         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
175           col  = in[j] - cstart_orig;         /* local col index */
176           brow = row/bs; bcol = col/bs;
177           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
178           if (roworiented) value = v[i*n+j];
179           else             value = v[i+j*m];
180           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
181           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
182         } else if (in[j] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
185 #endif
186         else {  /* off-diag entry (B) */
187           if (mat->was_assembled) {
188             if (!baij->colmap) {
189               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
190             }
191 #if defined(PETSC_USE_CTABLE)
192             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
193             col  = col - 1;
194 #else
195             col = baij->colmap[in[j]/bs] - 1;
196 #endif
197             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
198               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
199               col  =  in[j];
200               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
201               B    = baij->B;
202               b    = (Mat_SeqBAIJ*)(B)->data;
203               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
204               ba   = b->a;
205             } else col += in[j]%bs;
206           } else col = in[j];
207           if (roworiented) value = v[i*n+j];
208           else             value = v[i+j*m];
209           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
210           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
211         }
212       }
213     } else {  /* off processor entry */
214       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
215       if (!baij->donotstash) {
216         mat->assembled = PETSC_FALSE;
217         n_loc          = 0;
218         for (j=0; j<n; j++) {
219           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
220           in_loc[n_loc] = in[j];
221           if (roworiented) {
222             v_loc[n_loc] = v[i*n+j];
223           } else {
224             v_loc[n_loc] = v[j*m+i];
225           }
226           n_loc++;
227         }
228         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
229       }
230     }
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
237 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
238 {
239   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
240   const MatScalar *value;
241   MatScalar       *barray     =baij->barray;
242   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
243   PetscErrorCode  ierr;
244   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
245   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
246   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
247 
248   PetscFunctionBegin;
249   if (!barray) {
250     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
251     baij->barray = barray;
252   }
253 
254   if (roworiented) {
255     stepval = (n-1)*bs;
256   } else {
257     stepval = (m-1)*bs;
258   }
259   for (i=0; i<m; i++) {
260     if (im[i] < 0) continue;
261 #if defined(PETSC_USE_DEBUG)
262     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
263 #endif
264     if (im[i] >= rstart && im[i] < rend) {
265       row = im[i] - rstart;
266       for (j=0; j<n; j++) {
267         if (im[i] > in[j]) {
268           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
269           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
270         }
271         /* If NumCol = 1 then a copy is not required */
272         if ((roworiented) && (n == 1)) {
273           barray = (MatScalar*) v + i*bs2;
274         } else if ((!roworiented) && (m == 1)) {
275           barray = (MatScalar*) v + j*bs2;
276         } else { /* Here a copy is required */
277           if (roworiented) {
278             value = v + i*(stepval+bs)*bs + j*bs;
279           } else {
280             value = v + j*(stepval+bs)*bs + i*bs;
281           }
282           for (ii=0; ii<bs; ii++,value+=stepval) {
283             for (jj=0; jj<bs; jj++) {
284               *barray++ = *value++;
285             }
286           }
287           barray -=bs2;
288         }
289 
290         if (in[j] >= cstart && in[j] < cend) {
291           col  = in[j] - cstart;
292           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
293         } else if (in[j] < 0) continue;
294 #if defined(PETSC_USE_DEBUG)
295         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
296 #endif
297         else {
298           if (mat->was_assembled) {
299             if (!baij->colmap) {
300               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
301             }
302 
303 #if defined(PETSC_USE_DEBUG)
304 #if defined(PETSC_USE_CTABLE)
305             { PetscInt data;
306               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
307               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
308             }
309 #else
310             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
311 #endif
312 #endif
313 #if defined(PETSC_USE_CTABLE)
314             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
315             col  = (col - 1)/bs;
316 #else
317             col = (baij->colmap[in[j]] - 1)/bs;
318 #endif
319             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
320               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
321               col  = in[j];
322             }
323           } else col = in[j];
324           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
325         }
326       }
327     } else {
328       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
329       if (!baij->donotstash) {
330         if (roworiented) {
331           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
332         } else {
333           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
334         }
335       }
336     }
337   }
338   PetscFunctionReturn(0);
339 }
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "MatGetValues_MPISBAIJ"
343 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
344 {
345   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
346   PetscErrorCode ierr;
347   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
348   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
349 
350   PetscFunctionBegin;
351   for (i=0; i<m; i++) {
352     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
353     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
354     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
355       row = idxm[i] - bsrstart;
356       for (j=0; j<n; j++) {
357         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
358         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
359         if (idxn[j] >= bscstart && idxn[j] < bscend) {
360           col  = idxn[j] - bscstart;
361           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
362         } else {
363           if (!baij->colmap) {
364             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
365           }
366 #if defined(PETSC_USE_CTABLE)
367           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
368           data--;
369 #else
370           data = baij->colmap[idxn[j]/bs]-1;
371 #endif
372           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
373           else {
374             col  = data + idxn[j]%bs;
375             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
376           }
377         }
378       }
379     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
380   }
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "MatNorm_MPISBAIJ"
386 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
387 {
388   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
389   PetscErrorCode ierr;
390   PetscReal      sum[2],*lnorm2;
391 
392   PetscFunctionBegin;
393   if (baij->size == 1) {
394     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
395   } else {
396     if (type == NORM_FROBENIUS) {
397       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
398       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
399       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
400       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
401       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
402       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
403       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
404       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
405     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
406       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
407       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
408       PetscReal    *rsum,*rsum2,vabs;
409       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
410       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
411       MatScalar    *v;
412 
413       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
414       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
415       /* Amat */
416       v = amat->a; jj = amat->j;
417       for (brow=0; brow<mbs; brow++) {
418         grow = bs*(rstart + brow);
419         nz   = amat->i[brow+1] - amat->i[brow];
420         for (bcol=0; bcol<nz; bcol++) {
421           gcol = bs*(rstart + *jj); jj++;
422           for (col=0; col<bs; col++) {
423             for (row=0; row<bs; row++) {
424               vabs            = PetscAbsScalar(*v); v++;
425               rsum[gcol+col] += vabs;
426               /* non-diagonal block */
427               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
428             }
429           }
430         }
431       }
432       /* Bmat */
433       v = bmat->a; jj = bmat->j;
434       for (brow=0; brow<mbs; brow++) {
435         grow = bs*(rstart + brow);
436         nz = bmat->i[brow+1] - bmat->i[brow];
437         for (bcol=0; bcol<nz; bcol++) {
438           gcol = bs*garray[*jj]; jj++;
439           for (col=0; col<bs; col++) {
440             for (row=0; row<bs; row++) {
441               vabs            = PetscAbsScalar(*v); v++;
442               rsum[gcol+col] += vabs;
443               rsum[grow+row] += vabs;
444             }
445           }
446         }
447       }
448       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
449       *norm = 0.0;
450       for (col=0; col<mat->cmap->N; col++) {
451         if (rsum2[col] > *norm) *norm = rsum2[col];
452       }
453       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
454     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
455   }
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
461 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
462 {
463   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
464   PetscErrorCode ierr;
465   PetscInt       nstash,reallocs;
466 
467   PetscFunctionBegin;
468   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
469 
470   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
471   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
472   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
473   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
474   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
475   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
481 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
482 {
483   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
484   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
487   PetscInt       *row,*col;
488   PetscBool      other_disassembled;
489   PetscMPIInt    n;
490   PetscBool      r1,r2,r3;
491   MatScalar      *val;
492 
493   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
494   PetscFunctionBegin;
495   if (!baij->donotstash &&  !mat->nooffprocentries) {
496     while (1) {
497       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
498       if (!flg) break;
499 
500       for (i=0; i<n;) {
501         /* Now identify the consecutive vals belonging to the same row */
502         for (j=i,rstart=row[j]; j<n; j++) {
503           if (row[j] != rstart) break;
504         }
505         if (j < n) ncols = j-i;
506         else       ncols = n-i;
507         /* Now assemble all these values with a single function call */
508         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
509         i    = j;
510       }
511     }
512     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
513     /* Now process the block-stash. Since the values are stashed column-oriented,
514        set the roworiented flag to column oriented, and after MatSetValues()
515        restore the original flags */
516     r1 = baij->roworiented;
517     r2 = a->roworiented;
518     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
519 
520     baij->roworiented = PETSC_FALSE;
521     a->roworiented    = PETSC_FALSE;
522 
523     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
524     while (1) {
525       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
526       if (!flg) break;
527 
528       for (i=0; i<n;) {
529         /* Now identify the consecutive vals belonging to the same row */
530         for (j=i,rstart=row[j]; j<n; j++) {
531           if (row[j] != rstart) break;
532         }
533         if (j < n) ncols = j-i;
534         else       ncols = n-i;
535         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
536         i    = j;
537       }
538     }
539     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
540 
541     baij->roworiented = r1;
542     a->roworiented    = r2;
543 
544     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
545   }
546 
547   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
548   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
549 
550   /* determine if any processor has disassembled, if so we must
551      also disassemble ourselfs, in order that we may reassemble. */
552   /*
553      if nonzero structure of submatrix B cannot change then we know that
554      no processor disassembled thus we can skip this stuff
555   */
556   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
557     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
558     if (mat->was_assembled && !other_disassembled) {
559       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
560     }
561   }
562 
563   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
564     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
565   }
566   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
567   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
568 
569   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
570 
571   baij->rowvalues = 0;
572 
573   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
574   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
575     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
576     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
577   }
578   PetscFunctionReturn(0);
579 }
580 
581 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
582 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
583 #include <petscdraw.h>
584 #undef __FUNCT__
585 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
586 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
587 {
588   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
589   PetscErrorCode    ierr;
590   PetscInt          bs   = mat->rmap->bs;
591   PetscMPIInt       rank = baij->rank;
592   PetscBool         iascii,isdraw;
593   PetscViewer       sviewer;
594   PetscViewerFormat format;
595 
596   PetscFunctionBegin;
597   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
598   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
599   if (iascii) {
600     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
601     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
602       MatInfo info;
603       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
604       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
605       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
606       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
607       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
608       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
609       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
610       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
611       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
612       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
613       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
614       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
615       PetscFunctionReturn(0);
616     } else if (format == PETSC_VIEWER_ASCII_INFO) {
617       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
618       PetscFunctionReturn(0);
619     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
620       PetscFunctionReturn(0);
621     }
622   }
623 
624   if (isdraw) {
625     PetscDraw draw;
626     PetscBool isnull;
627     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
628     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
629   }
630 
631   {
632     /* assemble the entire matrix onto first processor. */
633     Mat          A;
634     Mat_SeqSBAIJ *Aloc;
635     Mat_SeqBAIJ  *Bloc;
636     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
637     MatScalar    *a;
638     const char   *matname;
639 
640     /* Should this be the same type as mat? */
641     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
642     if (!rank) {
643       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
644     } else {
645       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
646     }
647     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
648     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
649     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
650     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
651 
652     /* copy over the A part */
653     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
654     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
655     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
656 
657     for (i=0; i<mbs; i++) {
658       rvals[0] = bs*(baij->rstartbs + i);
659       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
660       for (j=ai[i]; j<ai[i+1]; j++) {
661         col = (baij->cstartbs+aj[j])*bs;
662         for (k=0; k<bs; k++) {
663           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
664           col++;
665           a += bs;
666         }
667       }
668     }
669     /* copy over the B part */
670     Bloc = (Mat_SeqBAIJ*)baij->B->data;
671     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
672     for (i=0; i<mbs; i++) {
673 
674       rvals[0] = bs*(baij->rstartbs + i);
675       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
676       for (j=ai[i]; j<ai[i+1]; j++) {
677         col = baij->garray[aj[j]]*bs;
678         for (k=0; k<bs; k++) {
679           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
680           col++;
681           a += bs;
682         }
683       }
684     }
685     ierr = PetscFree(rvals);CHKERRQ(ierr);
686     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
687     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
688     /*
689        Everyone has to call to draw the matrix since the graphics waits are
690        synchronized across all processors that share the PetscDraw object
691     */
692     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
693     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
694     if (!rank) {
695       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
696       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
697     }
698     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
699     ierr = MatDestroy(&A);CHKERRQ(ierr);
700   }
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatView_MPISBAIJ"
706 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
707 {
708   PetscErrorCode ierr;
709   PetscBool      iascii,isdraw,issocket,isbinary;
710 
711   PetscFunctionBegin;
712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
714   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
715   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
716   if (iascii || isdraw || issocket || isbinary) {
717     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
718   }
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatDestroy_MPISBAIJ"
724 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
725 {
726   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
727   PetscErrorCode ierr;
728 
729   PetscFunctionBegin;
730 #if defined(PETSC_USE_LOG)
731   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
732 #endif
733   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
734   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
735   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
736   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
737 #if defined(PETSC_USE_CTABLE)
738   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
739 #else
740   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
741 #endif
742   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
743   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
744   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
745   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
746   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
747   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
748   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
749   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
750   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
751   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
752   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
753   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
754   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
755   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
756   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
757 #if defined(PETSC_USE_REAL_MAT_SINGLE)
758   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
761   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
762   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
763   ierr = PetscFree(mat->data);CHKERRQ(ierr);
764 
765   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
766   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
767   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
768   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
769   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
770   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
776 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
777 {
778   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
779   PetscErrorCode ierr;
780   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
781   PetscScalar    *x,*from;
782 
783   PetscFunctionBegin;
784   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
785   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
786 
787   /* diagonal part */
788   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
789   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
790 
791   /* subdiagonal part */
792   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
793 
794   /* copy x into the vec slvec0 */
795   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
796   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
797 
798   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
799   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
800   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
801 
802   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
803   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
804   /* supperdiagonal part */
805   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatMult_MPISBAIJ"
811 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
812 {
813   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
814   PetscErrorCode ierr;
815   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
816   PetscScalar    *x,*from;
817 
818   PetscFunctionBegin;
819   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
820   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
821 
822   /* diagonal part */
823   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
824   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
825 
826   /* subdiagonal part */
827   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
828 
829   /* copy x into the vec slvec0 */
830   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
831   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
832 
833   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
834   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
835   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
836 
837   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
838   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
839   /* supperdiagonal part */
840   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
841   PetscFunctionReturn(0);
842 }
843 
844 #undef __FUNCT__
845 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
846 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
847 {
848   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
849   PetscErrorCode ierr;
850   PetscInt       nt;
851 
852   PetscFunctionBegin;
853   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
854   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
855 
856   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
857   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
858 
859   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* do diagonal part */
861   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
862   /* do supperdiagonal part */
863   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
865   /* do subdiagonal part */
866   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
867   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
868   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
874 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
875 {
876   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
877   PetscErrorCode ierr;
878   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
879   PetscScalar    *x,*from,zero=0.0;
880 
881   PetscFunctionBegin;
882   /*
883   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
884   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
885   */
886   /* diagonal part */
887   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
888   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
889 
890   /* subdiagonal part */
891   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
892 
893   /* copy x into the vec slvec0 */
894   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
895   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
896   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
897   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
898 
899   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
901   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
902 
903   /* supperdiagonal part */
904   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
905   PetscFunctionReturn(0);
906 }
907 
908 #undef __FUNCT__
909 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
910 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
911 {
912   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
913   PetscErrorCode ierr;
914 
915   PetscFunctionBegin;
916   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
917   /* do diagonal part */
918   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
919   /* do supperdiagonal part */
920   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
922 
923   /* do subdiagonal part */
924   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
925   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
926   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
927   PetscFunctionReturn(0);
928 }
929 
930 /*
931   This only works correctly for square matrices where the subblock A->A is the
932    diagonal block
933 */
934 #undef __FUNCT__
935 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
936 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
937 {
938   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
943   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatScale_MPISBAIJ"
949 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
950 {
951   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
952   PetscErrorCode ierr;
953 
954   PetscFunctionBegin;
955   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
956   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetRow_MPISBAIJ"
962 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
963 {
964   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
965   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
966   PetscErrorCode ierr;
967   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
968   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
969   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
970 
971   PetscFunctionBegin;
972   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
973   mat->getrowactive = PETSC_TRUE;
974 
975   if (!mat->rowvalues && (idx || v)) {
976     /*
977         allocate enough space to hold information from the longest row.
978     */
979     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
980     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
981     PetscInt     max = 1,mbs = mat->mbs,tmp;
982     for (i=0; i<mbs; i++) {
983       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
984       if (max < tmp) max = tmp;
985     }
986     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
987   }
988 
989   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
990   lrow = row - brstart;  /* local row index */
991 
992   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
993   if (!v)   {pvA = 0; pvB = 0;}
994   if (!idx) {pcA = 0; if (!v) pcB = 0;}
995   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
996   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
997   nztot = nzA + nzB;
998 
999   cmap = mat->garray;
1000   if (v  || idx) {
1001     if (nztot) {
1002       /* Sort by increasing column numbers, assuming A and B already sorted */
1003       PetscInt imark = -1;
1004       if (v) {
1005         *v = v_p = mat->rowvalues;
1006         for (i=0; i<nzB; i++) {
1007           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1008           else break;
1009         }
1010         imark = i;
1011         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1012         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1013       }
1014       if (idx) {
1015         *idx = idx_p = mat->rowindices;
1016         if (imark > -1) {
1017           for (i=0; i<imark; i++) {
1018             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1019           }
1020         } else {
1021           for (i=0; i<nzB; i++) {
1022             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1023             else break;
1024           }
1025           imark = i;
1026         }
1027         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1028         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1029       }
1030     } else {
1031       if (idx) *idx = 0;
1032       if (v)   *v   = 0;
1033     }
1034   }
1035   *nz  = nztot;
1036   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1037   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1043 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1044 {
1045   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1046 
1047   PetscFunctionBegin;
1048   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1049   baij->getrowactive = PETSC_FALSE;
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1055 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1056 {
1057   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1058   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1059 
1060   PetscFunctionBegin;
1061   aA->getrow_utriangular = PETSC_TRUE;
1062   PetscFunctionReturn(0);
1063 }
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1066 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1067 {
1068   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1069   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1070 
1071   PetscFunctionBegin;
1072   aA->getrow_utriangular = PETSC_FALSE;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1078 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1079 {
1080   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1081   PetscErrorCode ierr;
1082 
1083   PetscFunctionBegin;
1084   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1085   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1091 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1092 {
1093   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1098   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1104 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1105 {
1106   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1111   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1117 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1118 {
1119   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1120   Mat            A  = a->A,B = a->B;
1121   PetscErrorCode ierr;
1122   PetscReal      isend[5],irecv[5];
1123 
1124   PetscFunctionBegin;
1125   info->block_size = (PetscReal)matin->rmap->bs;
1126 
1127   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1128 
1129   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1130   isend[3] = info->memory;  isend[4] = info->mallocs;
1131 
1132   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1133 
1134   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1135   isend[3] += info->memory;  isend[4] += info->mallocs;
1136   if (flag == MAT_LOCAL) {
1137     info->nz_used      = isend[0];
1138     info->nz_allocated = isend[1];
1139     info->nz_unneeded  = isend[2];
1140     info->memory       = isend[3];
1141     info->mallocs      = isend[4];
1142   } else if (flag == MAT_GLOBAL_MAX) {
1143     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1144 
1145     info->nz_used      = irecv[0];
1146     info->nz_allocated = irecv[1];
1147     info->nz_unneeded  = irecv[2];
1148     info->memory       = irecv[3];
1149     info->mallocs      = irecv[4];
1150   } else if (flag == MAT_GLOBAL_SUM) {
1151     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1152 
1153     info->nz_used      = irecv[0];
1154     info->nz_allocated = irecv[1];
1155     info->nz_unneeded  = irecv[2];
1156     info->memory       = irecv[3];
1157     info->mallocs      = irecv[4];
1158   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1159   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1160   info->fill_ratio_needed = 0;
1161   info->factor_mallocs    = 0;
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1167 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1168 {
1169   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1170   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1171   PetscErrorCode ierr;
1172 
1173   PetscFunctionBegin;
1174   switch (op) {
1175   case MAT_NEW_NONZERO_LOCATIONS:
1176   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1177   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1178   case MAT_KEEP_NONZERO_PATTERN:
1179   case MAT_NEW_NONZERO_LOCATION_ERR:
1180     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1181     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1182     break;
1183   case MAT_ROW_ORIENTED:
1184     a->roworiented = flg;
1185 
1186     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1187     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1188     break;
1189   case MAT_NEW_DIAGONALS:
1190     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1191     break;
1192   case MAT_IGNORE_OFF_PROC_ENTRIES:
1193     a->donotstash = flg;
1194     break;
1195   case MAT_USE_HASH_TABLE:
1196     a->ht_flag = flg;
1197     break;
1198   case MAT_HERMITIAN:
1199     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1200     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1201 
1202     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1203     break;
1204   case MAT_SPD:
1205     A->spd_set = PETSC_TRUE;
1206     A->spd     = flg;
1207     if (flg) {
1208       A->symmetric                  = PETSC_TRUE;
1209       A->structurally_symmetric     = PETSC_TRUE;
1210       A->symmetric_set              = PETSC_TRUE;
1211       A->structurally_symmetric_set = PETSC_TRUE;
1212     }
1213     break;
1214   case MAT_SYMMETRIC:
1215     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1216     break;
1217   case MAT_STRUCTURALLY_SYMMETRIC:
1218     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1219     break;
1220   case MAT_SYMMETRY_ETERNAL:
1221     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1222     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1223     break;
1224   case MAT_IGNORE_LOWER_TRIANGULAR:
1225     aA->ignore_ltriangular = flg;
1226     break;
1227   case MAT_ERROR_LOWER_TRIANGULAR:
1228     aA->ignore_ltriangular = flg;
1229     break;
1230   case MAT_GETROW_UPPERTRIANGULAR:
1231     aA->getrow_utriangular = flg;
1232     break;
1233   default:
1234     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1235   }
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1241 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1242 {
1243   PetscErrorCode ierr;
1244 
1245   PetscFunctionBegin;
1246   if (MAT_INITIAL_MATRIX || *B != A) {
1247     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1248   }
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1254 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1255 {
1256   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1257   Mat            a     = baij->A, b=baij->B;
1258   PetscErrorCode ierr;
1259   PetscInt       nv,m,n;
1260   PetscBool      flg;
1261 
1262   PetscFunctionBegin;
1263   if (ll != rr) {
1264     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1265     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1266   }
1267   if (!ll) PetscFunctionReturn(0);
1268 
1269   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1270   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1271 
1272   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1273   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1274 
1275   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276 
1277   /* left diagonalscale the off-diagonal part */
1278   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1279 
1280   /* scale the diagonal part */
1281   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1282 
1283   /* right diagonalscale the off-diagonal part */
1284   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1285   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1291 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1292 {
1293   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1294   PetscErrorCode ierr;
1295 
1296   PetscFunctionBegin;
1297   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatEqual_MPISBAIJ"
1305 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1306 {
1307   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1308   Mat            a,b,c,d;
1309   PetscBool      flg;
1310   PetscErrorCode ierr;
1311 
1312   PetscFunctionBegin;
1313   a = matA->A; b = matA->B;
1314   c = matB->A; d = matB->B;
1315 
1316   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1317   if (flg) {
1318     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1319   }
1320   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatCopy_MPISBAIJ"
1326 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1327 {
1328   PetscErrorCode ierr;
1329   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1330   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1331 
1332   PetscFunctionBegin;
1333   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1334   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1335     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1336     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1337     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1338   } else {
1339     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1340     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1341   }
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1347 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1348 {
1349   PetscErrorCode ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1358 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1359 {
1360   PetscErrorCode ierr;
1361   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1362   PetscBLASInt   bnz,one=1;
1363   Mat_SeqSBAIJ   *xa,*ya;
1364   Mat_SeqBAIJ    *xb,*yb;
1365 
1366   PetscFunctionBegin;
1367   if (str == SAME_NONZERO_PATTERN) {
1368     PetscScalar alpha = a;
1369     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1370     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1371     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1372     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1373     xb   = (Mat_SeqBAIJ*)xx->B->data;
1374     yb   = (Mat_SeqBAIJ*)yy->B->data;
1375     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1376     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1377     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1378   } else {
1379     Mat      B;
1380     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1381     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1382     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1383     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1384     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1385     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1386     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1387     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1388     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1389     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1390     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1391     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1392     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1393     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1394     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1395     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1396     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1397     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1398     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1406 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1407 {
1408   PetscErrorCode ierr;
1409   PetscInt       i;
1410   PetscBool      flg;
1411 
1412   PetscFunctionBegin;
1413   for (i=0; i<n; i++) {
1414     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1415     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1416   }
1417   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 
1422 /* -------------------------------------------------------------------*/
1423 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1424                                        MatGetRow_MPISBAIJ,
1425                                        MatRestoreRow_MPISBAIJ,
1426                                        MatMult_MPISBAIJ,
1427                                /*  4*/ MatMultAdd_MPISBAIJ,
1428                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1429                                        MatMultAdd_MPISBAIJ,
1430                                        0,
1431                                        0,
1432                                        0,
1433                                /* 10*/ 0,
1434                                        0,
1435                                        0,
1436                                        MatSOR_MPISBAIJ,
1437                                        MatTranspose_MPISBAIJ,
1438                                /* 15*/ MatGetInfo_MPISBAIJ,
1439                                        MatEqual_MPISBAIJ,
1440                                        MatGetDiagonal_MPISBAIJ,
1441                                        MatDiagonalScale_MPISBAIJ,
1442                                        MatNorm_MPISBAIJ,
1443                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1444                                        MatAssemblyEnd_MPISBAIJ,
1445                                        MatSetOption_MPISBAIJ,
1446                                        MatZeroEntries_MPISBAIJ,
1447                                /* 24*/ 0,
1448                                        0,
1449                                        0,
1450                                        0,
1451                                        0,
1452                                /* 29*/ MatSetUp_MPISBAIJ,
1453                                        0,
1454                                        0,
1455                                        0,
1456                                        0,
1457                                /* 34*/ MatDuplicate_MPISBAIJ,
1458                                        0,
1459                                        0,
1460                                        0,
1461                                        0,
1462                                /* 39*/ MatAXPY_MPISBAIJ,
1463                                        MatGetSubMatrices_MPISBAIJ,
1464                                        MatIncreaseOverlap_MPISBAIJ,
1465                                        MatGetValues_MPISBAIJ,
1466                                        MatCopy_MPISBAIJ,
1467                                /* 44*/ 0,
1468                                        MatScale_MPISBAIJ,
1469                                        0,
1470                                        0,
1471                                        0,
1472                                /* 49*/ 0,
1473                                        0,
1474                                        0,
1475                                        0,
1476                                        0,
1477                                /* 54*/ 0,
1478                                        0,
1479                                        MatSetUnfactored_MPISBAIJ,
1480                                        0,
1481                                        MatSetValuesBlocked_MPISBAIJ,
1482                                /* 59*/ 0,
1483                                        0,
1484                                        0,
1485                                        0,
1486                                        0,
1487                                /* 64*/ 0,
1488                                        0,
1489                                        0,
1490                                        0,
1491                                        0,
1492                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1493                                        0,
1494                                        0,
1495                                        0,
1496                                        0,
1497                                /* 74*/ 0,
1498                                        0,
1499                                        0,
1500                                        0,
1501                                        0,
1502                                /* 79*/ 0,
1503                                        0,
1504                                        0,
1505                                        0,
1506                                        MatLoad_MPISBAIJ,
1507                                /* 84*/ 0,
1508                                        0,
1509                                        0,
1510                                        0,
1511                                        0,
1512                                /* 89*/ 0,
1513                                        0,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                /* 94*/ 0,
1518                                        0,
1519                                        0,
1520                                        0,
1521                                        0,
1522                                /* 99*/ 0,
1523                                        0,
1524                                        0,
1525                                        0,
1526                                        0,
1527                                /*104*/ 0,
1528                                        MatRealPart_MPISBAIJ,
1529                                        MatImaginaryPart_MPISBAIJ,
1530                                        MatGetRowUpperTriangular_MPISBAIJ,
1531                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1532                                /*109*/ 0,
1533                                        0,
1534                                        0,
1535                                        0,
1536                                        0,
1537                                /*114*/ 0,
1538                                        0,
1539                                        0,
1540                                        0,
1541                                        0,
1542                                /*119*/ 0,
1543                                        0,
1544                                        0,
1545                                        0,
1546                                        0,
1547                                /*124*/ 0,
1548                                        0,
1549                                        0,
1550                                        0,
1551                                        0,
1552                                /*129*/ 0,
1553                                        0,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                /*134*/ 0,
1558                                        0,
1559                                        0,
1560                                        0,
1561                                        0,
1562                                /*139*/ 0,
1563                                        0,
1564                                        0
1565 };
1566 
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1570 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1571 {
1572   PetscFunctionBegin;
1573   *a = ((Mat_MPISBAIJ*)A->data)->A;
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1579 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1580 {
1581   Mat_MPISBAIJ   *b;
1582   PetscErrorCode ierr;
1583   PetscInt       i,mbs,Mbs;
1584 
1585   PetscFunctionBegin;
1586   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1587   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1588   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1589   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1590 
1591   b   = (Mat_MPISBAIJ*)B->data;
1592   mbs = B->rmap->n/bs;
1593   Mbs = B->rmap->N/bs;
1594   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1595 
1596   B->rmap->bs = bs;
1597   b->bs2      = bs*bs;
1598   b->mbs      = mbs;
1599   b->nbs      = mbs;
1600   b->Mbs      = Mbs;
1601   b->Nbs      = Mbs;
1602 
1603   for (i=0; i<=b->size; i++) {
1604     b->rangebs[i] = B->rmap->range[i]/bs;
1605   }
1606   b->rstartbs = B->rmap->rstart/bs;
1607   b->rendbs   = B->rmap->rend/bs;
1608 
1609   b->cstartbs = B->cmap->rstart/bs;
1610   b->cendbs   = B->cmap->rend/bs;
1611 
1612   if (!B->preallocated) {
1613     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1614     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1615     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1616     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1617     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1618     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1619     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1620     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1621     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1622   }
1623 
1624   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1625   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1626 
1627   B->preallocated = PETSC_TRUE;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1633 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1634 {
1635   PetscInt       m,rstart,cstart,cend;
1636   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1637   const PetscInt *JJ    =0;
1638   PetscScalar    *values=0;
1639   PetscErrorCode ierr;
1640 
1641   PetscFunctionBegin;
1642   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1643   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1644   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1645   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1646   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1647   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1648   m      = B->rmap->n/bs;
1649   rstart = B->rmap->rstart/bs;
1650   cstart = B->cmap->rstart/bs;
1651   cend   = B->cmap->rend/bs;
1652 
1653   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1654   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1655   for (i=0; i<m; i++) {
1656     nz = ii[i+1] - ii[i];
1657     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1658     nz_max = PetscMax(nz_max,nz);
1659     JJ     = jj + ii[i];
1660     for (j=0; j<nz; j++) {
1661       if (*JJ >= cstart) break;
1662       JJ++;
1663     }
1664     d = 0;
1665     for (; j<nz; j++) {
1666       if (*JJ++ >= cend) break;
1667       d++;
1668     }
1669     d_nnz[i] = d;
1670     o_nnz[i] = nz - d;
1671   }
1672   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1673   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1674 
1675   values = (PetscScalar*)V;
1676   if (!values) {
1677     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1678     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1679   }
1680   for (i=0; i<m; i++) {
1681     PetscInt          row    = i + rstart;
1682     PetscInt          ncols  = ii[i+1] - ii[i];
1683     const PetscInt    *icols = jj + ii[i];
1684     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1685     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1686   }
1687 
1688   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #if defined(PETSC_HAVE_MUMPS)
1696 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1697 #endif
1698 #if defined(PETSC_HAVE_PASTIX)
1699 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1700 #endif
1701 
1702 /*MC
1703    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1704    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1705    the matrix is stored.
1706 
1707   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1708   can call MatSetOption(Mat, MAT_HERMITIAN);
1709 
1710    Options Database Keys:
1711 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1712 
1713   Level: beginner
1714 
1715 .seealso: MatCreateMPISBAIJ
1716 M*/
1717 
1718 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatCreate_MPISBAIJ"
1722 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1723 {
1724   Mat_MPISBAIJ   *b;
1725   PetscErrorCode ierr;
1726   PetscBool      flg;
1727 
1728   PetscFunctionBegin;
1729   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1730   B->data = (void*)b;
1731   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1732 
1733   B->ops->destroy = MatDestroy_MPISBAIJ;
1734   B->ops->view    = MatView_MPISBAIJ;
1735   B->assembled    = PETSC_FALSE;
1736   B->insertmode   = NOT_SET_VALUES;
1737 
1738   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1739   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1740 
1741   /* build local table of row and column ownerships */
1742   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1743 
1744   /* build cache for off array entries formed */
1745   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1746 
1747   b->donotstash  = PETSC_FALSE;
1748   b->colmap      = NULL;
1749   b->garray      = NULL;
1750   b->roworiented = PETSC_TRUE;
1751 
1752   /* stuff used in block assembly */
1753   b->barray = 0;
1754 
1755   /* stuff used for matrix vector multiply */
1756   b->lvec    = 0;
1757   b->Mvctx   = 0;
1758   b->slvec0  = 0;
1759   b->slvec0b = 0;
1760   b->slvec1  = 0;
1761   b->slvec1a = 0;
1762   b->slvec1b = 0;
1763   b->sMvctx  = 0;
1764 
1765   /* stuff for MatGetRow() */
1766   b->rowindices   = 0;
1767   b->rowvalues    = 0;
1768   b->getrowactive = PETSC_FALSE;
1769 
1770   /* hash table stuff */
1771   b->ht           = 0;
1772   b->hd           = 0;
1773   b->ht_size      = 0;
1774   b->ht_flag      = PETSC_FALSE;
1775   b->ht_fact      = 0;
1776   b->ht_total_ct  = 0;
1777   b->ht_insert_ct = 0;
1778 
1779   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1780   b->ijonly = PETSC_FALSE;
1781 
1782   b->in_loc = 0;
1783   b->v_loc  = 0;
1784   b->n_loc  = 0;
1785   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1786   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1787   if (flg) {
1788     PetscReal fact = 1.39;
1789     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1790     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1791     if (fact <= 1.0) fact = 1.39;
1792     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1793     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1794   }
1795   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1796 
1797 #if defined(PETSC_HAVE_PASTIX)
1798   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1799 #endif
1800 #if defined(PETSC_HAVE_MUMPS)
1801   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1802 #endif
1803   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1807   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1809 
1810   B->symmetric                  = PETSC_TRUE;
1811   B->structurally_symmetric     = PETSC_TRUE;
1812   B->symmetric_set              = PETSC_TRUE;
1813   B->structurally_symmetric_set = PETSC_TRUE;
1814 
1815   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 /*MC
1820    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1821 
1822    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1823    and MATMPISBAIJ otherwise.
1824 
1825    Options Database Keys:
1826 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1827 
1828   Level: beginner
1829 
1830 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1831 M*/
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1835 /*@C
1836    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1837    the user should preallocate the matrix storage by setting the parameters
1838    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1839    performance can be increased by more than a factor of 50.
1840 
1841    Collective on Mat
1842 
1843    Input Parameters:
1844 +  B - the matrix
1845 .  bs   - size of blockk
1846 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1847            submatrix  (same for all local rows)
1848 .  d_nnz - array containing the number of block nonzeros in the various block rows
1849            in the upper triangular and diagonal part of the in diagonal portion of the local
1850            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1851            for the diagonal entry and set a value even if it is zero.
1852 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1853            submatrix (same for all local rows).
1854 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1855            off-diagonal portion of the local submatrix that is right of the diagonal
1856            (possibly different for each block row) or NULL.
1857 
1858 
1859    Options Database Keys:
1860 .   -mat_no_unroll - uses code that does not unroll the loops in the
1861                      block calculations (much slower)
1862 .   -mat_block_size - size of the blocks to use
1863 
1864    Notes:
1865 
1866    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1867    than it must be used on all processors that share the object for that argument.
1868 
1869    If the *_nnz parameter is given then the *_nz parameter is ignored
1870 
1871    Storage Information:
1872    For a square global matrix we define each processor's diagonal portion
1873    to be its local rows and the corresponding columns (a square submatrix);
1874    each processor's off-diagonal portion encompasses the remainder of the
1875    local matrix (a rectangular submatrix).
1876 
1877    The user can specify preallocated storage for the diagonal part of
1878    the local submatrix with either d_nz or d_nnz (not both).  Set
1879    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1880    memory allocation.  Likewise, specify preallocated storage for the
1881    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1882 
1883    You can call MatGetInfo() to get information on how effective the preallocation was;
1884    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1885    You can also run with the option -info and look for messages with the string
1886    malloc in them to see if additional memory allocation was needed.
1887 
1888    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1889    the figure below we depict these three local rows and all columns (0-11).
1890 
1891 .vb
1892            0 1 2 3 4 5 6 7 8 9 10 11
1893           --------------------------
1894    row 3  |. . . d d d o o o o  o  o
1895    row 4  |. . . d d d o o o o  o  o
1896    row 5  |. . . d d d o o o o  o  o
1897           --------------------------
1898 .ve
1899 
1900    Thus, any entries in the d locations are stored in the d (diagonal)
1901    submatrix, and any entries in the o locations are stored in the
1902    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1903    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1904 
1905    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1906    plus the diagonal part of the d matrix,
1907    and o_nz should indicate the number of block nonzeros per row in the o matrix
1908 
1909    In general, for PDE problems in which most nonzeros are near the diagonal,
1910    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1911    or you will get TERRIBLE performance; see the users' manual chapter on
1912    matrices.
1913 
1914    Level: intermediate
1915 
1916 .keywords: matrix, block, aij, compressed row, sparse, parallel
1917 
1918 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1919 @*/
1920 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1921 {
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1926   PetscValidType(B,1);
1927   PetscValidLogicalCollectiveInt(B,bs,2);
1928   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatCreateSBAIJ"
1934 /*@C
1935    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1936    (block compressed row).  For good matrix assembly performance
1937    the user should preallocate the matrix storage by setting the parameters
1938    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1939    performance can be increased by more than a factor of 50.
1940 
1941    Collective on MPI_Comm
1942 
1943    Input Parameters:
1944 +  comm - MPI communicator
1945 .  bs   - size of blockk
1946 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1947            This value should be the same as the local size used in creating the
1948            y vector for the matrix-vector product y = Ax.
1949 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1950            This value should be the same as the local size used in creating the
1951            x vector for the matrix-vector product y = Ax.
1952 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1953 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1954 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1955            submatrix  (same for all local rows)
1956 .  d_nnz - array containing the number of block nonzeros in the various block rows
1957            in the upper triangular portion of the in diagonal portion of the local
1958            (possibly different for each block block row) or NULL.
1959            If you plan to factor the matrix you must leave room for the diagonal entry and
1960            set its value even if it is zero.
1961 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1962            submatrix (same for all local rows).
1963 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1964            off-diagonal portion of the local submatrix (possibly different for
1965            each block row) or NULL.
1966 
1967    Output Parameter:
1968 .  A - the matrix
1969 
1970    Options Database Keys:
1971 .   -mat_no_unroll - uses code that does not unroll the loops in the
1972                      block calculations (much slower)
1973 .   -mat_block_size - size of the blocks to use
1974 .   -mat_mpi - use the parallel matrix data structures even on one processor
1975                (defaults to using SeqBAIJ format on one processor)
1976 
1977    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1978    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1979    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1980 
1981    Notes:
1982    The number of rows and columns must be divisible by blocksize.
1983    This matrix type does not support complex Hermitian operation.
1984 
1985    The user MUST specify either the local or global matrix dimensions
1986    (possibly both).
1987 
1988    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1989    than it must be used on all processors that share the object for that argument.
1990 
1991    If the *_nnz parameter is given then the *_nz parameter is ignored
1992 
1993    Storage Information:
1994    For a square global matrix we define each processor's diagonal portion
1995    to be its local rows and the corresponding columns (a square submatrix);
1996    each processor's off-diagonal portion encompasses the remainder of the
1997    local matrix (a rectangular submatrix).
1998 
1999    The user can specify preallocated storage for the diagonal part of
2000    the local submatrix with either d_nz or d_nnz (not both).  Set
2001    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2002    memory allocation.  Likewise, specify preallocated storage for the
2003    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2004 
2005    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2006    the figure below we depict these three local rows and all columns (0-11).
2007 
2008 .vb
2009            0 1 2 3 4 5 6 7 8 9 10 11
2010           --------------------------
2011    row 3  |. . . d d d o o o o  o  o
2012    row 4  |. . . d d d o o o o  o  o
2013    row 5  |. . . d d d o o o o  o  o
2014           --------------------------
2015 .ve
2016 
2017    Thus, any entries in the d locations are stored in the d (diagonal)
2018    submatrix, and any entries in the o locations are stored in the
2019    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2020    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2021 
2022    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2023    plus the diagonal part of the d matrix,
2024    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2025    In general, for PDE problems in which most nonzeros are near the diagonal,
2026    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2027    or you will get TERRIBLE performance; see the users' manual chapter on
2028    matrices.
2029 
2030    Level: intermediate
2031 
2032 .keywords: matrix, block, aij, compressed row, sparse, parallel
2033 
2034 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2035 @*/
2036 
2037 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2038 {
2039   PetscErrorCode ierr;
2040   PetscMPIInt    size;
2041 
2042   PetscFunctionBegin;
2043   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2044   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2046   if (size > 1) {
2047     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2048     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2049   } else {
2050     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2051     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2052   }
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2059 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2060 {
2061   Mat            mat;
2062   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2063   PetscErrorCode ierr;
2064   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2065   PetscScalar    *array;
2066 
2067   PetscFunctionBegin;
2068   *newmat = 0;
2069 
2070   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2071   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2072   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2073   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2074   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2075   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2076 
2077   mat->factortype   = matin->factortype;
2078   mat->preallocated = PETSC_TRUE;
2079   mat->assembled    = PETSC_TRUE;
2080   mat->insertmode   = NOT_SET_VALUES;
2081 
2082   a      = (Mat_MPISBAIJ*)mat->data;
2083   a->bs2 = oldmat->bs2;
2084   a->mbs = oldmat->mbs;
2085   a->nbs = oldmat->nbs;
2086   a->Mbs = oldmat->Mbs;
2087   a->Nbs = oldmat->Nbs;
2088 
2089 
2090   a->size         = oldmat->size;
2091   a->rank         = oldmat->rank;
2092   a->donotstash   = oldmat->donotstash;
2093   a->roworiented  = oldmat->roworiented;
2094   a->rowindices   = 0;
2095   a->rowvalues    = 0;
2096   a->getrowactive = PETSC_FALSE;
2097   a->barray       = 0;
2098   a->rstartbs     = oldmat->rstartbs;
2099   a->rendbs       = oldmat->rendbs;
2100   a->cstartbs     = oldmat->cstartbs;
2101   a->cendbs       = oldmat->cendbs;
2102 
2103   /* hash table stuff */
2104   a->ht           = 0;
2105   a->hd           = 0;
2106   a->ht_size      = 0;
2107   a->ht_flag      = oldmat->ht_flag;
2108   a->ht_fact      = oldmat->ht_fact;
2109   a->ht_total_ct  = 0;
2110   a->ht_insert_ct = 0;
2111 
2112   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2113   if (oldmat->colmap) {
2114 #if defined(PETSC_USE_CTABLE)
2115     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2116 #else
2117     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2118     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2119     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2120 #endif
2121   } else a->colmap = 0;
2122 
2123   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2124     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2125     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2126     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2127   } else a->garray = 0;
2128 
2129   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2130   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2132   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2134 
2135   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2137   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2139 
2140   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2141   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2143   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2144   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2145   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2146   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2147   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2148   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2149   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2150   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2151   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2153 
2154   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2155   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2156   a->sMvctx = oldmat->sMvctx;
2157   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2158 
2159   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2160   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2161   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2162   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2163   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2164   *newmat = mat;
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatLoad_MPISBAIJ"
2170 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2171 {
2172   PetscErrorCode ierr;
2173   PetscInt       i,nz,j,rstart,rend;
2174   PetscScalar    *vals,*buf;
2175   MPI_Comm       comm;
2176   MPI_Status     status;
2177   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2178   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2179   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2180   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2181   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2182   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2183   int            fd;
2184 
2185   PetscFunctionBegin;
2186   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2187   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2188   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2189   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2190   if (bs < 0) bs = 1;
2191 
2192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2193   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2194   if (!rank) {
2195     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2196     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2197     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2198     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2199   }
2200 
2201   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2202 
2203   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2204   M    = header[1];
2205   N    = header[2];
2206 
2207   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2208   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2209   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2210 
2211   /* If global sizes are set, check if they are consistent with that given in the file */
2212   if (sizesset) {
2213     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2214   }
2215   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2216   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2217 
2218   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2219 
2220   /*
2221      This code adds extra rows to make sure the number of rows is
2222      divisible by the blocksize
2223   */
2224   Mbs        = M/bs;
2225   extra_rows = bs - M + bs*(Mbs);
2226   if (extra_rows == bs) extra_rows = 0;
2227   else                  Mbs++;
2228   if (extra_rows &&!rank) {
2229     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2230   }
2231 
2232   /* determine ownership of all rows */
2233   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2234     mbs = Mbs/size + ((Mbs % size) > rank);
2235     m   = mbs*bs;
2236   } else { /* User Set */
2237     m   = newmat->rmap->n;
2238     mbs = m/bs;
2239   }
2240   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2241   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2242   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2243   rowners[0] = 0;
2244   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2245   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2246   rstart = rowners[rank];
2247   rend   = rowners[rank+1];
2248 
2249   /* distribute row lengths to all processors */
2250   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2251   if (!rank) {
2252     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2253     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2254     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2255     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2256     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2257     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2258     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2259   } else {
2260     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2261   }
2262 
2263   if (!rank) {   /* procs[0] */
2264     /* calculate the number of nonzeros on each processor */
2265     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2266     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2267     for (i=0; i<size; i++) {
2268       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2269         procsnz[i] += rowlengths[j];
2270       }
2271     }
2272     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2273 
2274     /* determine max buffer needed and allocate it */
2275     maxnz = 0;
2276     for (i=0; i<size; i++) {
2277       maxnz = PetscMax(maxnz,procsnz[i]);
2278     }
2279     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2280 
2281     /* read in my part of the matrix column indices  */
2282     nz     = procsnz[0];
2283     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2284     mycols = ibuf;
2285     if (size == 1) nz -= extra_rows;
2286     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2287     if (size == 1) {
2288       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2289     }
2290 
2291     /* read in every ones (except the last) and ship off */
2292     for (i=1; i<size-1; i++) {
2293       nz   = procsnz[i];
2294       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2295       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2296     }
2297     /* read in the stuff for the last proc */
2298     if (size != 1) {
2299       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2300       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2301       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2302       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2303     }
2304     ierr = PetscFree(cols);CHKERRQ(ierr);
2305   } else {  /* procs[i], i>0 */
2306     /* determine buffer space needed for message */
2307     nz = 0;
2308     for (i=0; i<m; i++) nz += locrowlens[i];
2309     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2310     mycols = ibuf;
2311     /* receive message of column indices*/
2312     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2313     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2314     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2315   }
2316 
2317   /* loop over local rows, determining number of off diagonal entries */
2318   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2319   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2320   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2321   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2322   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2323   rowcount = 0;
2324   nzcount  = 0;
2325   for (i=0; i<mbs; i++) {
2326     dcount  = 0;
2327     odcount = 0;
2328     for (j=0; j<bs; j++) {
2329       kmax = locrowlens[rowcount];
2330       for (k=0; k<kmax; k++) {
2331         tmp = mycols[nzcount++]/bs; /* block col. index */
2332         if (!mask[tmp]) {
2333           mask[tmp] = 1;
2334           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2335           else masked1[dcount++] = tmp; /* entry in diag portion */
2336         }
2337       }
2338       rowcount++;
2339     }
2340 
2341     dlens[i]  = dcount;  /* d_nzz[i] */
2342     odlens[i] = odcount; /* o_nzz[i] */
2343 
2344     /* zero out the mask elements we set */
2345     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2346     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2347   }
2348   if (!sizesset) {
2349     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2350   }
2351   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2352   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2353 
2354   if (!rank) {
2355     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2356     /* read in my part of the matrix numerical values  */
2357     nz     = procsnz[0];
2358     vals   = buf;
2359     mycols = ibuf;
2360     if (size == 1) nz -= extra_rows;
2361     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2362     if (size == 1) {
2363       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2364     }
2365 
2366     /* insert into matrix */
2367     jj = rstart*bs;
2368     for (i=0; i<m; i++) {
2369       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2370       mycols += locrowlens[i];
2371       vals   += locrowlens[i];
2372       jj++;
2373     }
2374 
2375     /* read in other processors (except the last one) and ship out */
2376     for (i=1; i<size-1; i++) {
2377       nz   = procsnz[i];
2378       vals = buf;
2379       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2380       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2381     }
2382     /* the last proc */
2383     if (size != 1) {
2384       nz   = procsnz[i] - extra_rows;
2385       vals = buf;
2386       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2387       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2388       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2389     }
2390     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2391 
2392   } else {
2393     /* receive numeric values */
2394     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2395 
2396     /* receive message of values*/
2397     vals   = buf;
2398     mycols = ibuf;
2399     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2400     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2401     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2402 
2403     /* insert into matrix */
2404     jj = rstart*bs;
2405     for (i=0; i<m; i++) {
2406       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2407       mycols += locrowlens[i];
2408       vals   += locrowlens[i];
2409       jj++;
2410     }
2411   }
2412 
2413   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2414   ierr = PetscFree(buf);CHKERRQ(ierr);
2415   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2416   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2417   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2418   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2419   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2420   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2426 /*XXXXX@
2427    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2428 
2429    Input Parameters:
2430 .  mat  - the matrix
2431 .  fact - factor
2432 
2433    Not Collective on Mat, each process can have a different hash factor
2434 
2435    Level: advanced
2436 
2437   Notes:
2438    This can also be set by the command line option: -mat_use_hash_table fact
2439 
2440 .keywords: matrix, hashtable, factor, HT
2441 
2442 .seealso: MatSetOption()
2443 @XXXXX*/
2444 
2445 
2446 #undef __FUNCT__
2447 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2448 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2449 {
2450   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2451   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2452   PetscReal      atmp;
2453   PetscReal      *work,*svalues,*rvalues;
2454   PetscErrorCode ierr;
2455   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2456   PetscMPIInt    rank,size;
2457   PetscInt       *rowners_bs,dest,count,source;
2458   PetscScalar    *va;
2459   MatScalar      *ba;
2460   MPI_Status     stat;
2461 
2462   PetscFunctionBegin;
2463   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2464   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2465   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2466 
2467   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2468   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2469 
2470   bs  = A->rmap->bs;
2471   mbs = a->mbs;
2472   Mbs = a->Mbs;
2473   ba  = b->a;
2474   bi  = b->i;
2475   bj  = b->j;
2476 
2477   /* find ownerships */
2478   rowners_bs = A->rmap->range;
2479 
2480   /* each proc creates an array to be distributed */
2481   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2482   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2483 
2484   /* row_max for B */
2485   if (rank != size-1) {
2486     for (i=0; i<mbs; i++) {
2487       ncols = bi[1] - bi[0]; bi++;
2488       brow  = bs*i;
2489       for (j=0; j<ncols; j++) {
2490         bcol = bs*(*bj);
2491         for (kcol=0; kcol<bs; kcol++) {
2492           col  = bcol + kcol;                /* local col index */
2493           col += rowners_bs[rank+1];      /* global col index */
2494           for (krow=0; krow<bs; krow++) {
2495             atmp = PetscAbsScalar(*ba); ba++;
2496             row  = brow + krow;   /* local row index */
2497             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2498             if (work[col] < atmp) work[col] = atmp;
2499           }
2500         }
2501         bj++;
2502       }
2503     }
2504 
2505     /* send values to its owners */
2506     for (dest=rank+1; dest<size; dest++) {
2507       svalues = work + rowners_bs[dest];
2508       count   = rowners_bs[dest+1]-rowners_bs[dest];
2509       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2510     }
2511   }
2512 
2513   /* receive values */
2514   if (rank) {
2515     rvalues = work;
2516     count   = rowners_bs[rank+1]-rowners_bs[rank];
2517     for (source=0; source<rank; source++) {
2518       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2519       /* process values */
2520       for (i=0; i<count; i++) {
2521         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2522       }
2523     }
2524   }
2525 
2526   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2527   ierr = PetscFree(work);CHKERRQ(ierr);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 #undef __FUNCT__
2532 #define __FUNCT__ "MatSOR_MPISBAIJ"
2533 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2534 {
2535   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2536   PetscErrorCode    ierr;
2537   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2538   PetscScalar       *x,*ptr,*from;
2539   Vec               bb1;
2540   const PetscScalar *b;
2541 
2542   PetscFunctionBegin;
2543   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2544   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2545 
2546   if (flag == SOR_APPLY_UPPER) {
2547     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2548     PetscFunctionReturn(0);
2549   }
2550 
2551   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2552     if (flag & SOR_ZERO_INITIAL_GUESS) {
2553       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2554       its--;
2555     }
2556 
2557     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2558     while (its--) {
2559 
2560       /* lower triangular part: slvec0b = - B^T*xx */
2561       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2562 
2563       /* copy xx into slvec0a */
2564       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2565       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2566       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2567       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2568 
2569       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2570 
2571       /* copy bb into slvec1a */
2572       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2573       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2574       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2575       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2576 
2577       /* set slvec1b = 0 */
2578       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2579 
2580       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2581       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2582       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2583       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2584 
2585       /* upper triangular part: bb1 = bb1 - B*x */
2586       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2587 
2588       /* local diagonal sweep */
2589       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2590     }
2591     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2592   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2593     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2594   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2595     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2596   } else if (flag & SOR_EISENSTAT) {
2597     Vec               xx1;
2598     PetscBool         hasop;
2599     const PetscScalar *diag;
2600     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2601     PetscInt          i,n;
2602 
2603     if (!mat->xx1) {
2604       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2605       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2606     }
2607     xx1 = mat->xx1;
2608     bb1 = mat->bb1;
2609 
2610     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2611 
2612     if (!mat->diag) {
2613       /* this is wrong for same matrix with new nonzero values */
2614       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2615       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2616     }
2617     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2618 
2619     if (hasop) {
2620       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2621       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2622     } else {
2623       /*
2624           These two lines are replaced by code that may be a bit faster for a good compiler
2625       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2626       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2627       */
2628       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2629       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2630       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2631       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2632       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2633       if (omega == 1.0) {
2634         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2635         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2636       } else {
2637         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2638         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2639       }
2640       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2641       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2642       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2643       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2644     }
2645 
2646     /* multiply off-diagonal portion of matrix */
2647     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2648     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2649     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2650     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2651     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2652     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2653     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2654     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2655     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2656     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2657 
2658     /* local sweep */
2659     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2660     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2661   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 #undef __FUNCT__
2666 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2667 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2668 {
2669   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2670   PetscErrorCode ierr;
2671   Vec            lvec1,bb1;
2672 
2673   PetscFunctionBegin;
2674   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2675   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2676 
2677   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2678     if (flag & SOR_ZERO_INITIAL_GUESS) {
2679       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2680       its--;
2681     }
2682 
2683     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2684     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2685     while (its--) {
2686       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687 
2688       /* lower diagonal part: bb1 = bb - B^T*xx */
2689       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2690       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2691 
2692       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2693       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2694       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2695 
2696       /* upper diagonal part: bb1 = bb1 - B*x */
2697       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2698       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2699 
2700       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2701 
2702       /* diagonal sweep */
2703       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2704     }
2705     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2706     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2707   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 #undef __FUNCT__
2712 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2713 /*@
2714      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2715          CSR format the local rows.
2716 
2717    Collective on MPI_Comm
2718 
2719    Input Parameters:
2720 +  comm - MPI communicator
2721 .  bs - the block size, only a block size of 1 is supported
2722 .  m - number of local rows (Cannot be PETSC_DECIDE)
2723 .  n - This value should be the same as the local size used in creating the
2724        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2725        calculated if N is given) For square matrices n is almost always m.
2726 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2727 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2728 .   i - row indices
2729 .   j - column indices
2730 -   a - matrix values
2731 
2732    Output Parameter:
2733 .   mat - the matrix
2734 
2735    Level: intermediate
2736 
2737    Notes:
2738        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2739      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2740      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2741 
2742        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2743 
2744 .keywords: matrix, aij, compressed row, sparse, parallel
2745 
2746 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2747           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2748 @*/
2749 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2750 {
2751   PetscErrorCode ierr;
2752 
2753 
2754   PetscFunctionBegin;
2755   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2756   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2757   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2758   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2759   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2760   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 
2765 #undef __FUNCT__
2766 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2767 /*@C
2768    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2769    (the default parallel PETSc format).
2770 
2771    Collective on MPI_Comm
2772 
2773    Input Parameters:
2774 +  B - the matrix
2775 .  bs - the block size
2776 .  i - the indices into j for the start of each local row (starts with zero)
2777 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2778 -  v - optional values in the matrix
2779 
2780    Level: developer
2781 
2782 .keywords: matrix, aij, compressed row, sparse, parallel
2783 
2784 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2785 @*/
2786 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2787 {
2788   PetscErrorCode ierr;
2789 
2790   PetscFunctionBegin;
2791   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 
2796